import re import pytest from pandas._libs.tslibs import Timedelta, offsets, to_offset @pytest.mark.parametrize( "freq_input,expected", [ (to_offset("10us"), offsets.Micro(10)), (offsets.Hour(), offsets.Hour()), ("2h30min", offsets.Minute(150)), ("2h 30min", offsets.Minute(150)), ("2h30min15s", offsets.Second(150 * 60 + 15)), ("2h 60min", offsets.Hour(3)), ("2h 20.5min", offsets.Second(8430)), ("1.5min", offsets.Second(90)), ("0.5S", offsets.Milli(500)), ("15l500u", offsets.Micro(15500)), ("10s75L", offsets.Milli(10075)), ("1s0.25ms", offsets.Micro(1000250)), ("1s0.25L", offsets.Micro(1000250)), ("2800N", offsets.Nano(2800)), ("2SM", offsets.SemiMonthEnd(2)), ("2SM-16", offsets.SemiMonthEnd(2, day_of_month=16)), ("2SMS-14", offsets.SemiMonthBegin(2, day_of_month=14)), ("2SMS-15", offsets.SemiMonthBegin(2)), ], ) def test_to_offset(freq_input, expected):
def get_freq_code(freqstr: str) -> int: off = to_offset(freqstr) code = off._period_dtype_code return code
def _parsed_string_to_bounds(self, reso, parsed: Timedelta): # reso is unused, included to match signature of DTI/PI lbound = parsed.round(parsed.resolution_string) rbound = lbound + to_offset(parsed.resolution_string) - Timedelta( 1, "ns") return lbound, rbound
def test_to_offset_no_evaluate(): msg = str(("", "")) with pytest.raises(TypeError, match=msg): to_offset(("", ""))
def _get_period_alias(freq) -> Optional[str]: freqstr = to_offset(freq).rule_code freq = get_period_alias(freqstr) return freq
def test_to_offset_pd_timedelta(kwargs, expected): # see gh-9064 td = Timedelta(**kwargs) result = to_offset(td) assert result == expected
def test_to_offset(freq_input, expected): result = to_offset(freq_input) assert result == expected
def __truediv__(self, other): # timedelta / X is well-defined for timedelta-like or numeric X if isinstance(other, self._recognized_scalars): other = Timedelta(other) # mypy assumes that __new__ returns an instance of the class # github.com/python/mypy/issues/1020 if cast("Timedelta | NaTType", other) is NaT: # specifically timedelta64-NaT result = np.empty(self.shape, dtype=np.float64) result.fill(np.nan) return result # otherwise, dispatch to Timedelta implementation return self._ndarray / other elif lib.is_scalar(other): # assume it is numeric result = self._ndarray / other freq = None if self.freq is not None: # Tick division is not implemented, so operate on Timedelta freq = self.freq.delta / other freq = to_offset(freq) return type(self)._simple_new(result, dtype=result.dtype, freq=freq) if not hasattr(other, "dtype"): # e.g. list, tuple other = np.array(other) if len(other) != len(self): raise ValueError("Cannot divide vectors with unequal lengths") elif is_timedelta64_dtype(other.dtype): # let numpy handle it return self._ndarray / other elif is_object_dtype(other.dtype): # We operate on raveled arrays to avoid problems in inference # on NaT # TODO: tests with non-nano srav = self.ravel() orav = other.ravel() result_list = [srav[n] / orav[n] for n in range(len(srav))] result = np.array(result_list).reshape(self.shape) # We need to do dtype inference in order to keep DataFrame ops # behavior consistent with Series behavior inferred = lib.infer_dtype(result, skipna=False) if inferred == "timedelta": flat = result.ravel() result = type(self)._from_sequence(flat).reshape(result.shape) elif inferred == "floating": result = result.astype(float) elif inferred == "datetime": # GH#39750 this occurs when result is all-NaT, in which case # we want to interpret these NaTs as td64. # We construct an all-td64NaT result. # error: Incompatible types in assignment (expression has type # "TimedeltaArray", variable has type "ndarray[Any, # dtype[floating[_64Bit]]]") result = self * np.nan # type: ignore[assignment] return result else: result = self._ndarray / other return type(self)._simple_new(result, dtype=result.dtype)
def _get_period_alias(freq: timedelta | BaseOffset | str) -> str | None: freqstr = to_offset(freq).rule_code return get_period_alias(freqstr)
def test_to_offset_no_evaluate(): with pytest.raises(ValueError, match="Could not evaluate"): to_offset(("", ""))
def tradeDayOffset(self, today, n, freq='1d', incl_on_offset_today=False, **kwargs): """ 日期漂移 若参数n为正,返回以today为起始日向前推第n个交易日,反之亦然。 若n为零,返回以today为起点,向后推1个freq的交易日。 注意: 若incl_on_offset_today=True,today on offset时,漂移的起点是today,today not offset时,漂移的起点是today +- offset 若incl_on_offset_today=False,日期漂移的起点是today +- offset。 例如: 2017-08-18是交易日, 2017-08-20不是交易日,则: tradeDayOffset('2017-08-18', 1, freq='1d', incl_on_offset_today=False) -> 2017-08-21 tradeDayOffset('2017-08-18', 1, freq='1d', incl_on_offset_today=True) -> 2017-08-18 tradeDayOffset('2017-08-18', 2, freq='1d', incl_on_offset_today=True) -> 2017-08-21 tradeDayOffset('2017-08-18', -1, freq='1d', incl_on_offset_today=True) -> 2017-08-18 tradeDayOffset('2017-08-18', -2, freq='1d', incl_on_offset_today=True) -> 2017-08-17 tradeDayOffset('2017-08-18', 0, freq='1d', incl_on_offset_today=False) -> 2017-08-18 tradeDayOffset('2017-08-20', 0, freq='1d', incl_on_offset_today=True) -> 2017-08-18 """ if n == 0: raise ValueError( "absolute value of parameter 'n' must be positive!") if is_non_string_iterable(today): days = pd.to_datetime(today) else: days = pd.DatetimeIndex([pd.to_datetime(today)]) raw_days = days.copy() move_forward = n > 0 begin_offset = freq.endswith('S') offset = to_offset(freq.upper()) # 先把日期归位到offset if begin_offset: days = days + offset - offset bdays = days - bday_chn_ashare + bday_chn_ashare else: days = days - offset + offset bdays = days + bday_chn_ashare - bday_chn_ashare if move_forward: if incl_on_offset_today: td = np.where(raw_days <= bdays, n - 1, n) else: td = np.where(raw_days < bdays, n - 1, n) else: if incl_on_offset_today: td = np.where(raw_days < bdays, n, n + 1) else: td = np.where(raw_days <= bdays, n, n + 1) if freq.endswith('d'): days = pd.DatetimeIndex( np.where(td != 0, days + bday_chn_ashare * td, days).astype('datetime64[D]')) else: days = pd.DatetimeIndex( np.where(td != 0, days + offset * td, days).astype('datetime64[D]')) if not begin_offset: days = days + bday_chn_ashare - bday_chn_ashare else: days = days - bday_chn_ashare + bday_chn_ashare if not is_non_string_iterable(today): days = days[0] return days
def test_get_to_timestamp_base(freqstr, exp_freqstr): left_code = to_offset(freqstr)._period_dtype_code exp_code = to_offset(exp_freqstr)._period_dtype_code assert get_to_timestamp_base(left_code) == exp_code
def get_freq_code(freqstr: str) -> int: off = to_offset(freqstr) # error: "BaseOffset" has no attribute "_period_dtype_code" code = off._period_dtype_code # type: ignore[attr-defined] return code
def test_extract_ordinals_raises(self): # with non-object, make sure we raise TypeError, not segfault arr = np.arange(5) freq = to_offset("D") with pytest.raises(TypeError, match="values must be object-dtype"): extract_ordinals(arr, freq)
def test_to_offset_whitespace(freqstr, expected): result = to_offset(freqstr) assert result == expected
def _get_period_alias(freq) -> str | None: freqstr = to_offset(freq).rule_code freq = get_period_alias(freqstr) return freq
def test_to_offset_leading_plus(freqstr, expected): result = to_offset(freqstr) assert result.n == expected
def test_anchored_shortcuts(shortcut, expected): result = to_offset(shortcut) assert result == expected
def test_resolution_bumping(args, expected): # see gh-14378 off = to_offset(str(args[0]) + args[1]) assert off.n == expected[0] assert off._prefix == expected[1]
def test_to_offset_negative(freqstr, expected): result = to_offset(freqstr) assert result.n == expected
def test_cat(args): msg = "Invalid frequency" with pytest.raises(ValueError, match=msg): to_offset(str(args[0]) + args[1])
def test_to_offset_tuple_unsupported(): with pytest.raises(TypeError, match="pass as a string instead"): to_offset((5, "T"))
def test_compatibility(freqstr, expected): ts_np = np.datetime64("2021-01-01T08:00:00.00") do = to_offset(freqstr) assert ts_np + do == np.datetime64(expected)
def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed="right"): """ Return a fixed frequency IntervalIndex. Parameters ---------- start : numeric or datetime-like, default None Left bound for generating intervals. end : numeric or datetime-like, default None Right bound for generating intervals. periods : int, default None Number of periods to generate. freq : numeric, str, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 for numeric and 'D' for datetime-like. name : str, default None Name of the resulting IntervalIndex. closed : {'left', 'right', 'both', 'neither'}, default 'right' Whether the intervals are closed on the left-side, right-side, both or neither. Returns ------- IntervalIndex See Also -------- IntervalIndex : An Index of intervals that are all closed on the same side. Notes ----- Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, exactly three must be specified. If ``freq`` is omitted, the resulting ``IntervalIndex`` will have ``periods`` linearly spaced elements between ``start`` and ``end``, inclusively. To learn more about datetime-like frequency strings, please see `this link <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. Examples -------- Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04]], closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. endpoints of the individual intervals within the ``IntervalIndex``. For numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), ... periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], (2017-03-01, 2017-04-01]], closed='right', dtype='interval[datetime64[ns]]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4) IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], closed='both', dtype='interval[int64]') """ start = maybe_box_datetimelike(start) end = maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com.any_none(periods, start, end): freq = 1 if is_number(endpoint) else "D" if com.count_not_none(start, end, periods, freq) != 3: raise ValueError("Of the four parameters: start, end, periods, and " "freq, exactly three must be specified") if not _is_valid_endpoint(start): raise ValueError( f"start must be numeric or datetime-like, got {start}") elif not _is_valid_endpoint(end): raise ValueError(f"end must be numeric or datetime-like, got {end}") if is_float(periods): periods = int(periods) elif not is_integer(periods) and periods is not None: raise TypeError(f"periods must be a number, got {periods}") if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError as err: raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err # verify type compatibility if not all([ _is_type_compatible(start, end), _is_type_compatible(start, freq), _is_type_compatible(end, freq), ]): raise TypeError("start, end, freq need to be type compatible") # +1 to convert interval count to breaks count (n breaks = n-1 intervals) if periods is not None: periods += 1 if is_number(endpoint): # force consistency between start/end/freq (lower end if freq skips it) if com.all_not_none(start, end, freq): end -= (end - start) % freq # compute the period/start/end if unspecified (at most one) if periods is None: periods = int((end - start) // freq) + 1 elif start is None: start = end - (periods - 1) * freq elif end is None: end = start + (periods - 1) * freq breaks = np.linspace(start, end, periods) if all(is_integer(x) for x in com.not_none(start, end, freq)): # np.linspace always produces float output breaks = maybe_downcast_to_dtype(breaks, "int64") else: # delegate to the appropriate range function if isinstance(endpoint, Timestamp): breaks = date_range(start=start, end=end, periods=periods, freq=freq) else: breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
def get_freq_code(freqstr): return to_offset(freqstr)._period_dtype_code