def test_from_model(self): process = ArmaProcess([1, -.8], [1, .3], 1000) t = 1000 rs = np.random.RandomState(12345) y = process.generate_sample(t, burnin=100, distrvs=rs.standard_normal) res = ARMA(y, (1, 1)).fit(disp=False) process_model = ArmaProcess.from_estimation(res) process_coef = ArmaProcess.from_coeffs(res.arparams, res.maparams, t) assert_equal(process_model.arcoefs, process_coef.arcoefs) assert_equal(process_model.macoefs, process_coef.macoefs) assert_equal(process_model.nobs, process_coef.nobs) assert_equal(process_model.isinvertible, process_coef.isinvertible) assert_equal(process_model.isstationary, process_coef.isstationary)
def test_from_model(self): process = ArmaProcess([1, -.8], [1, .3], 1000) t = 1000 rs = np.random.RandomState(12345) y = process.generate_sample(t, burnin=100, distrvs=rs.standard_normal) res = ARMA(y, (1, 1)).fit(disp=False) process_model = ArmaProcess.from_estimation(res) process_coef = ArmaProcess.from_coeffs(res.arparams, res.maparams, t) assert_equal(process_model.arcoefs, process_coef.arcoefs) assert_equal(process_model.macoefs, process_coef.macoefs) assert_equal(process_model.nobs, process_coef.nobs) assert_equal(process_model.isinvertible, process_coef.isinvertible) assert_equal(process_model.isstationary, process_coef.isstationary)
def test_from_estimation(d, seasonal): ar = [0.8] if not seasonal else [0.8, 0, 0, 0.2, -0.16] ma = [0.4] if not seasonal else [0.4, 0, 0, 0.2, -0.08] ap = ArmaProcess.from_coeffs(ar, ma, 500) idx = pd.date_range(dt.datetime(1900, 1, 1), periods=500, freq="Q") data = ap.generate_sample(500) if d == 1: data = np.cumsum(data) data = pd.Series(data, index=idx) seasonal_order = (1, 0, 1, 4) if seasonal else None mod = ARIMA(data, order=(1, d, 1), seasonal_order=seasonal_order) res = mod.fit() ap_from = ArmaProcess.from_estimation(res) shape = (5,) if seasonal else (1,) assert ap_from.arcoefs.shape == shape assert ap_from.macoefs.shape == shape
def early_warnings_null_hypothesis( series, indicators=["var", "ac"], roll_window=0.4, smooth="Lowess", span=0.1, band_width=0.2, lag_times=[1], n_simulations=1000, ): """ Function to estimate the significance of the early warnings analysis by performing a null hypothesis test. The function estimate distributions of trends in early warning indicators from different surrogate timeseries generated after fitting an ARMA(p,q) model on the original data. The trends are estimated by the nonparametric Kendall tau correlation coefficient and can be compared to the trends estimated in the original timeseries to produce probabilities of false positives. The function returns a dataframe that contains the Kendall tau rank correlation estimates for orignal data and surrogates. Parameters ---------- series : pandas Series Time series observations. indicators: list of strings The statistics (leading indicator) selected for which the sensitivity analysis is perfomed. roll_window: float Rolling window size as a proportion of the length of the time-series data. smooth : string Type of detrending. It can be {'Gaussian', 'Lowess', 'None'}. span: float Span of time-series data used for Lowess filtering. Taken as a proportion of time-series length if in (0,1), otherwise taken as absolute. band_width: float Bandwidth of Gaussian kernel. Taken as a proportion of time-series length if in (0,1), otherwise taken as absolute. lag_times: list of int List of lag times at which to compute autocorrelation. n_simulations: int The number of surrogate data. Default is 1000. Returns -------- DataFrame: A dataframe that contains the Kendall tau rank correlation estimates for each indicator estimated on each surrogate dataset. """ ews_dic = ewstools.core.ews_compute( series, roll_window=roll_window, smooth=smooth, span=span, band_width=band_width, ews=indicators, lag_times=lag_times, ) from statsmodels.tsa.arima_model import ARIMA from statsmodels.tsa.arima_process import ArmaProcess # Use the short_series EWS if smooth='None'. Otherwise use reiduals. eval_series = ews_dic["EWS metrics"]["Residuals"] # Fit ARMA model based on AIC aic_max = 10000 for i in range(0, 2): for j in range(0, 2): model = ARIMA(eval_series, order=(i, j, 0)) model_fit = model.fit() aic = model_fit.aic print("AR", "MA", "AIC") print(i, j, aic) if aic < aic_max: aic_max = aic result = model_fit def compute_indicators(series): """ Rolling window indicators computation based on the ewstools.core.ews_compute function from ewstools """ df_ews = pd.DataFrame() # Compute the rolling window size (integer value) rw_size = int(np.floor(roll_window * series.shape[0])) # ------------ Compute temporal EWS---------------# # Compute standard deviation as a Series and add to the DataFrame if "sd" in indicators: roll_sd = series.rolling(window=rw_size).std() df_ews["Standard deviation"] = roll_sd # Compute variance as a Series and add to the DataFrame if "var" in indicators: roll_var = series.rolling(window=rw_size).var() df_ews["Variance"] = roll_var # Compute autocorrelation for each lag in lag_times and add to the DataFrame if "ac" in indicators: for i in range(len(lag_times)): roll_ac = series.rolling(window=rw_size).apply( func=lambda x: pd.Series(x).autocorr(lag=lag_times[i]), raw=True) df_ews["Lag-" + str(lag_times[i]) + " AC"] = roll_ac # Compute Coefficient of Variation (C.V) and add to the DataFrame if "cv" in indicators: # mean of raw_series roll_mean = series.rolling(window=rw_size).mean() # standard deviation of residuals roll_std = series.rolling(window=rw_size).std() # coefficient of variation roll_cv = roll_std.divide(roll_mean) df_ews["Coefficient of variation"] = roll_cv # Compute skewness and add to the DataFrame if "skew" in indicators: roll_skew = series.rolling(window=rw_size).skew() df_ews["Skewness"] = roll_skew # Compute Kurtosis and add to DataFrame if "kurt" in indicators: roll_kurt = series.rolling(window=rw_size).kurt() df_ews["Kurtosis"] = roll_kurt # ------------Compute Kendall tau coefficients----------------# """ In this section we compute the kendall correlation coefficients for each EWS with respect to time. Values close to one indicate high correlation (i.e. EWS increasing with time), values close to zero indicate no significant correlation, and values close to negative one indicate high negative correlation (i.e. EWS decreasing with time).""" # Put time values as their own series for correlation computation time_vals = pd.Series(df_ews.index, index=df_ews.index) # List of EWS that can be used for Kendall tau computation ktau_metrics = [ "Variance", "Standard deviation", "Skewness", "Kurtosis", "Coefficient of variation", "Smax", "Smax/Var", "Smax/Mean", ] + ["Lag-" + str(i) + " AC" for i in lag_times] # Find intersection with this list and EWS computed ews_list = df_ews.columns.values.tolist() ktau_metrics = list(set(ews_list) & set(ktau_metrics)) # Find Kendall tau for each EWS and store in a DataFrame dic_ktau = { x: df_ews[x].corr(time_vals, method="kendall") for x in ktau_metrics } # temporary dictionary df_ktau = pd.DataFrame( dic_ktau, index=[0]) # DataFrame (easier for concatenation purposes) # -------------Organise final output and return--------------# # Ouptut a dictionary containing EWS DataFrame, power spectra DataFrame, and Kendall tau values output_dic = {"EWS metrics": df_ews, "Kendall tau": df_ktau} return output_dic process = ArmaProcess.from_estimation(result) # run simulations on best fitted ARIMA process and get values kendall_tau = [] for i in range(n_simulations): ts = process.generate_sample(len(eval_series)) kendall_tau.append(compute_indicators(pd.Series(ts))["Kendall tau"]) surrogates_kendall_tau_df = pd.concat(kendall_tau) surrogates_kendall_tau_df["true_data"] = False # get results for true data data_kendall_tau_df = compute_indicators(eval_series)["Kendall tau"] data_kendall_tau_df["true_data"] = True # return dataframe with both surrogates and true data kendall_tau_df = pd.concat( [data_kendall_tau_df, surrogates_kendall_tau_df]) return kendall_tau_df