def test_sliding_window_splitter_with_initial_window(y, fh, window_length, step_length, initial_window): """Test SlidingWindowSplitter.""" if _inputs_are_supported([fh, initial_window, window_length, step_length]): cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, initial_window=initial_window, start_with_window=True, ) train_windows, test_windows, _, n_splits = _check_cv(cv, y) assert train_windows[0].shape[0] == _coerce_duration_to_int( duration=initial_window, freq="D") assert np.vstack(train_windows[1:]).shape == ( n_splits - 1, _coerce_duration_to_int(duration=window_length, freq="D"), ) assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh))) else: match = "Unsupported combination of types" with pytest.raises(TypeError, match=match): SlidingWindowSplitter( fh=fh, initial_window=initial_window, window_length=window_length, step_length=step_length, start_with_window=True, )
def to_relative(self, cutoff=None): """Return relative values Parameters ---------- cutoff : pd.Period, pd.Timestamp, int, optional (default=None) Cutoff value is required to convert a relative forecasting horizon to an absolute one and vice versa. Returns ------- fh : ForecastingHorizon Relative representation of forecasting horizon """ if self.is_relative: return self._new() else: self._check_cutoff(cutoff) values = self.to_pandas() - cutoff if isinstance(self.to_pandas(), (pd.PeriodIndex, pd.DatetimeIndex)): values = _coerce_duration_to_int(values, unit=_get_unit(cutoff)) return self._new(values, is_relative=True)
def to_absolute_int(self, start, cutoff=None): """Return absolute values as zero-based integer index starting from `start`. Parameters ---------- start : pd.Period, pd.Timestamp, int Start value returned as zero. cutoff : pd.Period, pd.Timestamp, int, optional (default=None) Cutoff value required to convert a relative forecasting horizon to an absolute one (and vice versa). Returns ------- fh : ForecastingHorizon Absolute representation of forecasting horizon as zero-based integer index. """ # We here check the start value, the cutoff value is checked when we use it # to convert the horizon to the absolute representation below absolute = self.to_absolute(cutoff).to_pandas() _check_start(start, absolute) # Note: We should here also coerce to periods for more reliable arithmetic # operations as in `to_relative` but currently doesn't work with # `update_predict` and incomplete time indices where the `freq` information # is lost, see comment on issue #534 integers = absolute - start if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)): integers = _coerce_duration_to_int(integers, freq=_get_freq(cutoff)) return self._new(integers, is_relative=False)
def _to_relative(self, y): absolute = y.index cutoff = self._y_index[0] _check_cutoff(cutoff, absolute) if isinstance(absolute, pd.DatetimeIndex): # We cannot use the freq from the the ForecastingHorizon itself (or its # wrapped pd.DatetimeIndex) because it may be none for non-regular # indices, so instead we use the freq of cutoff. freq = _get_freq(cutoff) # coerce to pd.Period for reliable arithmetics and computations of # time deltas absolute = _coerce_to_period(absolute, freq) cutoff = _coerce_to_period(cutoff, freq) # Compute relative values relative = absolute - cutoff # Coerce durations (time deltas) into integer values for given frequency if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)): relative = _coerce_duration_to_int(relative, freq=_get_freq(cutoff)) return relative
def test_sliding_window_splitter_start_with_empty_window( y, fh, window_length, step_length): """Test SlidingWindowSplitter.""" if _inputs_are_supported([fh, window_length, step_length]): cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, start_with_window=False, ) train_windows, test_windows, _, n_splits = _check_cv( cv, y, allow_empty_window=True) assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh))) n_incomplete = _get_n_incomplete_windows(window_length, step_length) train_windows = train_windows[n_incomplete:] assert np.vstack(train_windows).shape == ( n_splits - n_incomplete, _coerce_duration_to_int(duration=window_length, freq="D"), ) else: match = "Unsupported combination of types" with pytest.raises(TypeError, match=match): SlidingWindowSplitter( fh=fh, initial_window=None, window_length=window_length, step_length=step_length, start_with_window=False, )
def test_single_window_splitter(y, fh, window_length): """Test SingleWindowSplitter.""" if _inputs_are_supported([fh, window_length]): cv = SingleWindowSplitter(fh=fh, window_length=window_length) train_windows, test_windows, cutoffs, n_splits = _check_cv(cv, y) train_window = train_windows[0] test_window = test_windows[0] assert n_splits == 1 assert train_window.shape[0] == _coerce_duration_to_int( duration=window_length, freq="D") checked_fh = check_fh(fh) assert test_window.shape[0] == len(checked_fh) if array_is_int(checked_fh): test_window_expected = train_window[-1] + checked_fh else: test_window_expected = np.array([ y.index.get_loc(y.index[train_window[-1]] + x) for x in checked_fh ]) np.testing.assert_array_equal(test_window, test_window_expected) else: with pytest.raises(TypeError, match="Unsupported combination of types"): SingleWindowSplitter(fh=fh, window_length=window_length)
def _to_relative(fh: ForecastingHorizon, cutoff=None) -> ForecastingHorizon: """Return forecasting horizon values relative to a cutoff. Parameters ---------- fh : ForecastingHorizon cutoff : pd.Period, pd.Timestamp, int, optional (default=None) Cutoff value required to convert a relative forecasting horizon to an absolute one (and vice versa). Returns ------- fh : ForecastingHorizon Relative representation of forecasting horizon. """ if fh.is_relative: return fh._new() else: absolute = fh.to_pandas() _check_cutoff(cutoff, absolute) # We cannot use the freq from the ForecastingHorizon itself (or its # wrapped pd.DatetimeIndex) because it may be none for non-regular # indices, so instead we use the freq of cutoff. freq = _get_freq(cutoff) if isinstance(absolute, pd.DatetimeIndex): # coerce to pd.Period for reliable arithmetics and computations of # time deltas absolute = _coerce_to_period(absolute, freq) cutoff = _coerce_to_period(cutoff, freq) # TODO: Replace when we upgrade our lower pandas bound # to a version where this is fixed # Compute relative values # The following line circumvents the bug in pandas # periods = pd.period_range(start="2021-01-01", periods=3, freq="2H") # periods - periods[0] # Out: Index([<0 * Hours>, <4 * Hours>, <8 * Hours>], dtype = 'object') # [v - periods[0] for v in periods] # Out: Index([<0 * Hours>, <2 * Hours>, <4 * Hours>], dtype='object') # TODO: v0.12.0: Check if this comment below can be removed, # so check if pandas has released the fix to PyPI: # This bug was reported: https://github.com/pandas-dev/pandas/issues/45999 # and fixed: https://github.com/pandas-dev/pandas/pull/46006 # Most likely it will be released with pandas 1.5 # Once the bug is fixed the line should simply be: # relative = absolute - cutoff relative = pd.Index([date - cutoff for date in absolute]) # Coerce durations (time deltas) into integer values for given frequency if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)): relative = _coerce_duration_to_int(relative, freq=freq) return fh._new(relative, is_relative=True)
def test_coerce_duration_to_int(duration): ret = _coerce_duration_to_int(duration, unit=_get_unit(duration)) # check output type is always integer assert type(ret) in (pd.Int64Index, np.integer, int) # check result if isinstance(duration, pd.Index): np.testing.assert_array_equal(ret, range(3)) if isinstance(duration, pd.tseries.offsets.BaseOffset): ret == 3
def to_absolute_int(self, start, cutoff=None): """Return absolute values as zero-based integer index starting from `start`. Parameters ---------- start : pd.Period, pd.Timestamp, int Start value returned as zero. cutoff : pd.Period, pd.Timestamp, int, optional (default=None) Cutoff value required to convert a relative forecasting horizon to an absolute one (and vice versa). Returns ------- fh : ForecastingHorizon Absolute representation of forecasting horizon as zero-based integer index. """ freq = _get_freq(cutoff) if isinstance(cutoff, pd.Timestamp): # coerce to pd.Period for reliable arithmetic operations and # computations of time deltas cutoff = _coerce_to_period(cutoff, freq=freq) absolute = self.to_absolute(cutoff).to_pandas() if isinstance(absolute, pd.DatetimeIndex): # coerce to pd.Period for reliable arithmetics and computations of # time deltas absolute = _coerce_to_period(absolute, freq=freq) # We here check the start value, the cutoff value is checked when we use it # to convert the horizon to the absolute representation below if isinstance(start, pd.Timestamp): start = _coerce_to_period(start, freq=freq) _check_start(start, absolute) # Note: We should here also coerce to periods for more reliable arithmetic # operations as in `to_relative` but currently doesn't work with # `update_predict` and incomplete time indices where the `freq` information # is lost, see comment on issue #534 # The following line circumvents the bug in pandas # periods = pd.period_range(start="2021-01-01", periods=3, freq="2H") # periods - periods[0] # Out: Index([<0 * Hours>, <4 * Hours>, <8 * Hours>], dtype = 'object') # [v - periods[0] for v in periods] # Out: Index([<0 * Hours>, <2 * Hours>, <4 * Hours>], dtype='object') integers = pd.Index([date - start for date in absolute]) if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)): integers = _coerce_duration_to_int(integers, freq=_get_freq(cutoff)) return self._new(integers, is_relative=False)
def test_expanding_window_splitter(y, fh, initial_window, step_length): """Test ExpandingWindowSplitter.""" cv = ExpandingWindowSplitter( fh=fh, initial_window=initial_window, step_length=step_length, start_with_window=True, ) train_windows, test_windows, _, n_splits = _check_cv(cv, y) assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh))) assert train_windows[0].shape[0] == _coerce_duration_to_int( duration=initial_window, freq="D") _check_expanding_windows(train_windows)
def test_coerce_duration_to_int(duration): """Test coercion of duration to int.""" ret = _coerce_duration_to_int(duration, freq=_get_freq(duration)) # check output type is always integer assert (type(ret) in (np.integer, int)) or is_integer_index(ret) # check result if isinstance(duration, pd.Index): np.testing.assert_array_equal(ret, range(3)) if isinstance(duration, pd.tseries.offsets.BaseOffset): assert ret == 3
def test_single_window_splitter(y, fh, window_length): """Test SingleWindowSplitter.""" cv = SingleWindowSplitter(fh=fh, window_length=window_length) train_windows, test_windows, cutoffs, n_splits = _check_cv(cv, y) train_window = train_windows[0] test_window = test_windows[0] assert n_splits == 1 assert train_window.shape[0] == _coerce_duration_to_int( duration=window_length, freq="D") assert test_window.shape[0] == len(check_fh(fh)) np.testing.assert_array_equal(test_window, train_window[-1] + check_fh(fh))
def test_sliding_window_splitter_with_initial_window(y, fh, window_length, step_length, initial_window): """Test SlidingWindowSplitter.""" if _windows_are_incompatible(initial_window, window_length): pytest.skip( "Incompatible initial_window and window_length are tested elsewhere." ) cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, initial_window=initial_window, start_with_window=True, ) train_windows, test_windows, _, n_splits = _check_cv(cv, y) assert train_windows[0].shape[0] == _coerce_duration_to_int( duration=initial_window, freq="D") assert np.vstack(train_windows[1:]).shape == ( n_splits - 1, _coerce_duration_to_int(duration=window_length, freq="D"), ) assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
def test_sliding_window_splitter(y, fh, window_length, step_length): """Test SlidingWindowSplitter.""" cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, start_with_window=True, ) train_windows, test_windows, _, n_splits = _check_cv(cv, y) assert np.vstack(train_windows).shape == ( n_splits, _coerce_duration_to_int(duration=window_length, freq="D"), ) assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
def to_relative(self, cutoff=None): """Return forecasting horizon values relative to a cutoff. Parameters ---------- cutoff : pd.Period, pd.Timestamp, int, optional (default=None) Cutoff value required to convert a relative forecasting horizon to an absolute one (and vice versa). Returns ------- fh : ForecastingHorizon Relative representation of forecasting horizon. """ if self.is_relative: return self._new() else: absolute = self.to_pandas() _check_cutoff(cutoff, absolute) if isinstance(absolute, pd.DatetimeIndex): # We cannot use the freq from the the ForecastingHorizon itself (or its # wrapped pd.DatetimeIndex) because it may be none for non-regular # indices, so instead we use the freq of cutoff. freq = _get_freq(cutoff) # coerce to pd.Period for reliable arithmetics and computations of # time deltas absolute = _coerce_to_period(absolute, freq) cutoff = _coerce_to_period(cutoff, freq) # Compute relative values relative = absolute - cutoff # Coerce durations (time deltas) into integer values for given frequency if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)): relative = _coerce_duration_to_int(relative, freq=_get_freq(cutoff)) return self._new(relative, is_relative=True)
def test_sliding_window_splitter_start_with_empty_window( y, fh, window_length, step_length): """Test SlidingWindowSplitter.""" cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, start_with_window=False, ) train_windows, test_windows, _, n_splits = _check_cv( cv, y, allow_empty_window=True) assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh))) n_incomplete = _get_n_incomplete_windows(window_length, step_length) train_windows = train_windows[n_incomplete:] assert np.vstack(train_windows).shape == ( n_splits - n_incomplete, _coerce_duration_to_int(duration=window_length, freq="D"), )
def to_absolute_int(self, start, cutoff=None): """Return absolute values as zero-based integer index Parameters ---------- start : pd.Period, pd.Timestamp, int Start value cutoff : pd.Period, pd.Timestamp, int, optional (default=None) Cutoff value is required to convert a relative forecasting horizon to an absolute one and vice versa. Returns ------- fh : ForecastingHorizon Absolute representation of forecasting horizon as zero-based integer index """ self._check_cutoff(start) absolute = self.to_absolute(cutoff).to_pandas() values = absolute - start if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)): values = _coerce_duration_to_int(values, unit=_get_unit(cutoff)) return self._new(values, is_relative=False)
def _get_n_incomplete_windows(window_length, step_length) -> int: return int( np.ceil( _coerce_duration_to_int(duration=window_length, freq="D") / _coerce_duration_to_int(duration=step_length, freq="D")))
def _get_step_length(x: NON_FLOAT_WINDOW_LENGTH_TYPES) -> int: return _coerce_duration_to_int(duration=x, freq="D")
def test_coerce_duration_to_int_with_non_allowed_durations(duration): """Test coercion of duration to int.""" with pytest.raises(ValueError, match="frequency is missing"): _coerce_duration_to_int(duration, freq=_get_freq(duration))