def test_cusum_ols(self): #R library(strucchange) #> sc = sctest(ginv ~ ggdp + lint, type="OLS-CUSUM") #> mkhtest(sc, 'cusum_ols', 'BB') cusum_ols = dict(statistic=1.055750610401214, pvalue=0.2149567397376543, parameters=(), distr='BB') #Brownian Bridge k_vars=3 cs_ols = smsdia.breaks_cusumolsresid(self.res.resid, ddof=k_vars) # compare_t_est(cs_ols, cusum_ols, decimal=(12, 12))
def test_cusum_ols(self): #R library(strucchange) #> sc = sctest(ginv ~ ggdp + lint, type="OLS-CUSUM") #> mkhtest(sc, 'cusum_ols', 'BB') cusum_ols = dict(statistic=1.055750610401214, pvalue=0.2149567397376543, parameters=(), distr='BB') #Brownian Bridge k_vars=3 cs_ols = smsdia.breaks_cusumolsresid(self.res.resid, ddof=k_vars) # compare_t_est(cs_ols, cusum_ols, decimal=(12, 12))
def breaks_cumsum(resid: pd.Series, ddof=0): """ Cumulative summation test for parameter stability based on ols residuals. documentation: https://www.statsmodels.org/devel/generated/statsmodels.stats.diagnostic.breaks_cusumolsresid.html#statsmodels.stats.diagnostic.breaks_cusumolsresid see: * https://en.wikipedia.org/wiki/Structural_break * https://www.stata.com/features/overview/cumulative-sum-test/ Null Hypothesis: This test looks for 'breaks' or huge changes in the parameter of interest over time, to see if there is structural instability in the series. Parameters ---------- resid : pd.Series An array of residuals. ddof : int The number of parameters in the OLS estimation, used as degrees of freedom correction for error variance. Returns ------- sup_b : float The test statistic, maximum of absolute value of scaled cumulative OLS residuals. pval : float Probability of observing the data under the null hypothesis of no structural change, based on asymptotic distribution which is a Brownian Bridge """ result = diagnostic.breaks_cusumolsresid(resid, ddof=ddof) BreaksCumSumResult = namedtuple('BreaksCumSumResult', 'statistic pvalue') return BreaksCumSumResult(result[0], result[1])
def omission_test(model, crit=0.05, behavior='ANY', indices=None): """ Add omitted breakpoint into records based on residual stationarity Uses recursive residuals within a CUMSUM test to check if each model has omitted a "structural change" (e.g., land cover change). Returns an array of True or False for each timeseries segment record depending on result from `statsmodels.stats.diagnostic.breaks_cusumolsresid`. Args: crit (float, optional): Critical p-value for rejection of null hypothesis that data contain no structural change behavior (str, optional): Method for dealing with multiple `test_indices`. `ANY` will return True if any one test index rejects the null hypothesis. `ALL` will only return True if ALL test indices reject the null hypothesis. indices (np.ndarray, optional): Array indices to test. User provided indices must be a subset of `model.test_indices`. Returns: np.ndarray: Array of True or False for each record where True indicates omitted break point """ from statsmodels.regression import linear_model from statsmodels.stats import diagnostic if behavior.lower() not in ['any', 'all']: raise ValueError('`behavior` must be "any" or "all"') if not indices: indices = model.test_indices if not np.all(np.in1d(indices, model.test_indices)): raise ValueError('`indices` must be a subset of ' '`model.test_indices`') if not model.ran: return np.empty(0, dtype=bool) omission = np.zeros((model.record.size, len(indices)), dtype=bool) for i, r in enumerate(model.record): # Skip if no model fit if r['start'] == 0 or r['end'] == 0: continue # Find matching X and Y in data index = np.where( (model.dates >= min(r['start'], r['end'])) & (model.dates <= max(r['end'], r['start'])))[0] # Grab matching X and Y _X = model.X[index, :] _Y = model.Y[:, index] for i_b, b in enumerate(indices): # Create OLS regression ols = linear_model.OLS(_Y[b, :], _X).fit() # Perform CUMSUM test on residuals test = diagnostic.breaks_cusumolsresid( ols.resid, _X.shape[1]) if test[1] < crit: omission[i, i_b] = True else: omission[i, i_b] = False # Collapse band answers according to `behavior` if behavior.lower() == 'any': return np.any(omission, 1) else: return np.all(omission, 1)
def omission_test(model, crit=0.05, behavior='ANY', indices=None): """ Add omitted breakpoint into records based on residual stationarity Uses recursive residuals within a CUMSUM test to check if each model has omitted a "structural change" (e.g., land cover change). Returns an array of True or False for each timeseries segment record depending on result from `statsmodels.stats.diagnostic.breaks_cusumolsresid`. Args: crit (float, optional): Critical p-value for rejection of null hypothesis that data contain no structural change behavior (str, optional): Method for dealing with multiple `test_indices`. `ANY` will return True if any one test index rejects the null hypothesis. `ALL` will only return True if ALL test indices reject the null hypothesis. indices (np.ndarray, optional): Array indices to test. User provided indices must be a subset of `model.test_indices`. Returns: np.ndarray: Array of True or False for each record where True indicates omitted break point """ from statsmodels.regression import linear_model from statsmodels.stats import diagnostic if behavior.lower() not in ['any', 'all']: raise ValueError('`behavior` must be "any" or "all"') if not indices: indices = model.test_indices if not np.all(np.in1d(indices, model.test_indices)): raise ValueError('`indices` must be a subset of ' '`model.test_indices`') if not model.ran: return np.empty(0, dtype=bool) omission = np.zeros((model.record.size, len(indices)), dtype=bool) for i, r in enumerate(model.record): # Skip if no model fit if r['start'] == 0 or r['end'] == 0: continue # Find matching X and Y in data index = np.where((model.dates >= min(r['start'], r['end'])) & (model.dates <= max(r['end'], r['start'])))[0] # Grab matching X and Y _X = model.X[index, :] _Y = model.Y[:, index] for i_b, b in enumerate(indices): # Create OLS regression ols = linear_model.OLS(_Y[b, :], _X).fit() # Perform CUMSUM test on residuals test = diagnostic.breaks_cusumolsresid(ols.resid, _X.shape[1]) if test[1] < crit: omission[i, i_b] = True else: omission[i, i_b] = False # Collapse band answers according to `behavior` if behavior.lower() == 'any': return np.any(omission, 1) else: return np.all(omission, 1)
def breaks_cumsumolsresid(self, timeseries): model, model_result = self.generate_model(timeseries) result = diagnostic.breaks_cusumolsresid(model_result) BreaksCumSumResult = namedtuple('BreaksCumSumResult', 'statistic pvalue') return BreaksCumSumResult(result[0], result[1])
def analyze_cross_correlation_timeseries(df, col_one, col_two, time_column, significance_threshold=0.05, zero_percent_threshold=0.05): series_one = df[col_one].copy() series_two = df[col_two].copy() series_one.index = df[time_column].copy() series_two.index = df[time_column].copy() series_one = series_one.dropna() series_two = series_two.dropna() if breaks_cusumolsresid(series_one)[1] > significance_threshold: print("cumulative sum test failed for feature") if breaks_cusumolsresid(series_two)[1] > significance_threshold: print("cumulative sum test failed for display") # no serial correlation if adfuller(series_one)[1] < significance_threshold and adfuller( series_two)[1] < significance_threshold: compare_timeseries(series_one, series_two) cross_correlated += 1 # serial correlation in series_one if adfuller(series_one)[1] > significance_threshold and adfuller( series_two)[1] < significance_threshold: try: smoothed_series_one = smooth_feature(series_one) if np.isfinite(smoothed_series_one).all() and ( smoothed_series_one.iloc[0] != smoothed_series_one).all(): compare_timeseries(smoothed_series_one, series_two) except ValueError: zero_percent = (series_one == 0).astype(int).sum(axis=0) / len(series_one) if zero_percent < zero_percent_threshold: series_one = series_one.replace(to_replace=0, method='ffill') smoothed_series_one = smooth_feature(series_one) if check_smoothed_feature(smoothed_series_one): compare_timeseries(smoothed_series_one, series_two) # serial correlation in series_two if adfuller(series_one)[1] < significance_threshold and adfuller( series_two)[1] > significance_threshold: try: smoothed_series_two = smooth_feature(series_two) if np.isfinite(smoothed_feature).all() and ( smoothed_feature.iloc[0] != smoothed_feature).all(): compare_timeseries(series_one, smoothed_series_two) except ValueError: zero_percent = (series_two == 0).astype(int).sum(axis=0) / len(series_two) if zero_percent < zero_percent_threshold: series_two = series_two.replace(to_replace=0, method='ffill') smoothed_series_two = smooth_feature(series_two) if check_smoothed_feature(smoothed_series_two): compare_timeseries(feature, smoothed_series_two) # serial correlation in both therefore use cointegration if adfuller(series_one)[1] > significance_threshold and adfuller( series_two)[1] > significance_threshold: cointegration_results = coint(series_one, series_two)[1] if cointegration_results < significance_threshold: print(f""" The t-statistic of the unit-root test {cointegration_results[0], The pvalue {cointegration_results[1]} is less than signifiance threshold of {significance_threshold}, So we reject the null hypothesis. And therefore, we believe there is cointegration (a relationship) between the two series. """) else: print(f""" The t-statistic of the unit-root test {cointegration_results[0], The pvalue {cointegration_results[1]} is greater than signifiance threshold of {significance_threshold}, So we fail to reject the null hypothesis. And therefore, we believe there is no relation between the series. """)
plt.plot(rcusum) plt.plot(rcusumci[0]) plt.plot(rcusumci[1]) plt.figure() plt.plot(rresid) plt.plot(np.abs(rresid)) print('cusum test reject:') print(((rcusum[1:] > rcusumci[1]) | (rcusum[1:] < rcusumci[0])).any()) rresid2, rparams2, rypred2, rresid_standardized2, rresid_scaled2, rcusum2, rcusumci2 = \ recursive_olsresiduals(res1, skip) #assert_almost_equal(rparams[skip+1:], rparams2[skip:-1],13) assert_almost_equal(rparams[skip:], rparams2[skip:], 13) #np.c_[rparams[skip+1:], rparams2[skip:-1]] #plt.show() #################### Example break test H, crit95 = breaks_hansen(res1) print(H) print(crit95) supb, pval, crit = breaks_cusumolsresid(res1.resid) print(supb, pval, crit) ##check whether this works directly: Ploberger/Kramer framing of standard cusum ##no, it's different, there is another denominator #print breaks_cusumolsresid(rresid[skip:]) #this function is still completely wrong, cut and paste does not apply #print breaks_cusum(rresid[skip:])