def get_table_dyn(S: pd.Index, n_tot: int, max_length=100): """ Parameters ---------- S: pd.Index or np.ndarray a Series of occurrences n_tot: int total number of occurrences in the original events max_length: int, default=None maximum number of occurrences for a cycle to cover, by default it will be set to :math:`\log_{2}\left(|S|\right)` """ diffs = np.diff(S) triples = sliding_window_view(S, 3) diff_pairs = sliding_window_view(diffs, 2) dS = S.max() - S.min() score_one = residual_length(1, n_tot, dS) # 1 really ? scores = sum(cycle_length(triples, diff_pairs, len(S), dS)) change = scores > 3 * score_one scores[change] = 3 * score_one # inplace replacement cut_points = np.array([-1] * len(scores), dtype=object) cut_points[~change] = None scores = dict(zip(((i, i + 2) for i in range(len(scores))), scores)) cut_points = dict(zip(scores.keys(), cut_points)) max_length = min([len(S), max_length]) for k in range(4, max_length + 1): w = sliding_window_view(S, k) _diffs = sliding_window_view(diffs, k - 1) _s = sum(cycle_length(w, _diffs, len(S), dS)) for ia, best_score in enumerate(_s): cut_point = None iz = ia + k - 1 for im in range(ia, iz): if im - ia + 1 < 3: score_left = score_one * (im - ia + 1) else: score_left = scores[(ia, im)] if iz - im < 3: score_right = score_one * (iz - im) else: score_right = scores[(im + 1, iz)] if score_left + score_right < best_score: best_score = score_left + score_right cut_point = im scores[(ia, iz)] = best_score cut_points[(ia, iz)] = cut_point return scores, cut_points
def _check_cutoffs_fh_y( cutoffs: VALID_CUTOFF_TYPES, fh: FORECASTING_HORIZON_TYPES, y: pd.Index ) -> None: """Check that combination of inputs is compatible. Currently, only two cases are allowed: either both `cutoffs` and `fh` are integers, or they are datetime or timedelta. Parameters ---------- cutoffs : np.array or pd.Index Cutoff points, positive and integer- or datetime-index like. Type should match the type of `fh` input. fh : int, timedelta, list or np.ndarray of ints or timedeltas Type should match the type of `cutoffs` input. y : pd.Index Index of time series Raises ------ ValueError if max cutoff plus max `fh` is above the last observation in `y` TypeError if `cutoffs` and `fh` type combination is not supported """ max_cutoff = np.max(cutoffs) max_fh = fh.max() msg = "`fh` is incompatible with given `cutoffs` and `y`." if is_int(x=max_cutoff) and is_int(x=max_fh): if max_cutoff + max_fh > y.shape[0]: raise ValueError(msg) elif is_datetime(x=max_cutoff) and is_timedelta(x=max_fh): if max_cutoff + max_fh > y.max(): raise ValueError(msg) else: raise TypeError("Unsupported type of `cutoffs` and `fh`")