def unitroot_test(series): # Basic statistic plt.figure() plt.plot(series) plot_pacf(series) # ADF test # AIC & BIC from lags 12 to 1 print('$p$ & AIC & BIC \\\\') max_lags = 12 for lags in (max_lags - i for i in range(max_lags)): ar_model = AutoReg(series, lags, 'n') res = ar_model.fit() print(f'{lags} & {round(res.aic, 3)} & {round(res.bic, 3)} \\\\') # Best lags by `ar_select_order` sel = ar_select_order(series, max_lags, trend='n') lags = sel.ar_lags[-1] print(f'Lags selection: {sel.ar_lags}') # Start ADF test adf = ADF(series, lags) print(adf.summary()) # PP test pp_tau = PhillipsPerron(series, 3, test_type='tau') # q = 3 pp_rho = PhillipsPerron(series, 3, test_type='rho') # q = 3 print(pp_tau.summary()) print(pp_rho.summary())
def sample(self, lagged_values, lagged_times=None, **ignored): """ Find Unique Values to see if outcomes are discrete or continuous """ uniques = np.unique(lagged_values) if len(uniques) < 0.2 * len( lagged_values ): #arbitrary cutoff of 20% to determine whether outcomes are continuous or quantized v = [ s for s in ( np.random.choice(lagged_values, self.num_predictions)) ] #randomly select from the lagged values and return as answer else: rev_values = lagged_values[:: -1] # our data are in reverse order, the AR model needs the opposite """ Simple Autoregression """ ARmodel = ar_select_order(rev_values, maxlag=int(0.1 * len(rev_values))) model_fit = ARmodel.model.fit() point_est = model_fit.predict(start=len(rev_values), end=len(rev_values), dynamic=False) st_dev = np.std(rev_values) #v = [s for s in (np.random.normal(point_est, st_dev, self.num_predictions))] v = [ s for s in (np.linspace(start=point_est - 2 * st_dev, stop=point_est + 2 * st_dev, num=self.num_predictions)) ] #spread the predictions out evenly print(*v, sep=", ") return v
def model(columna, n_periods): mod = ar_select_order(columna.ravel(), maxlag=15, old_names=True) AutoRegfit = AutoReg(columna, trend='c', lags=mod.ar_lags, old_names=True).fit() prediccion = AutoRegfit.predict(start=len(columna), end=len(columna) + n_periods - 1, dynamic=False) return prediccion
def get_geweke_diags(chains, split=0.3, skip=0.5): """Function computes geweke statistic for markov chains""" # Check dimensionality of chains # If single chain add dimesion n_dims = len(chains.shape) if n_dims == 2: chains = np.expand_dims(chains, axis=0) # Compute split demarcations as integers to be used for indexing n_floor = int(chains.shape[1] * (split + skip)) n_skip = int(chains.shape[1] * (skip)) # Initialize the vector in which we store z-scores z_scores = np.zeros(chains.shape[0] * chains.shape[2]) # Main loop that computes statistics of interest for i in range(chains.shape[0]): for j in range(chains.shape[2]): # Get Autoregression coefficients for each part of split of chain sel_1 = ar_select_order(chains[i, n_skip:n_floor, j], maxlag=10, seasonal=False) sel_2 = ar_select_order(chains[i, n_floor:, j], maxlag=10, seasonal=False) res_1 = sel_1.model.fit() res_2 = sel_2.model.fit() # Compute the Autoregression corrected respective standard deviations s_1 = res_1.sigma2 / np.square(1 - np.sum(res_1.params[1:])) s_2 = res_2.sigma2 / np.square(1 - np.sum(res_2.params[1:])) # Compute (absolute) z scores that form the basis of the test of whether or not to continue sampling z_scores[i * chains.shape[2] + j] = np.abs( (np.mean(chains[i, n_skip:n_floor, j]) - np.mean(chains[i, n_floor:, j])) / np.sqrt((1 / (n_floor - n_skip)) * s_1 + (1 / (chains.shape[1] - n_floor)) * s_2)) # Continuation check: All absolute z scores below 2? If yes stop sampling continue_ = int((np.sum(z_scores > 2)) > 0) return continue_, z_scores
def ar_model(data): from statsmodels.tsa.ar_model import AutoReg, ar_select_order sel = ar_select_order(data["GMSLNA"], 20, old_names=False, seasonal=True, period=12) res = sel.model.fit() return res
def test_ar_order_select(): # GH#2118 np.random.seed(12345) y = arma_generate_sample([1, -0.75, 0.3], [1], 100) ts = Series(y, index=date_range(start="1/1/1990", periods=100, freq="M")) res = ar_select_order(ts, maxlag=12, ic="aic") assert tuple(res.ar_lags) == (1, 2) assert isinstance(res.aic, dict) assert isinstance(res.bic, dict) assert isinstance(res.hqic, dict) assert isinstance(res.model, AutoReg) assert not res.seasonal assert res.trend == "c" assert res.period is None
def test_ar_order_select(): # GH#2118 np.random.seed(12345) y = sm.tsa.arma_generate_sample([1, -.75, .3], [1], 100) ts = Series(y, index=date_range(start='1/1/1990', periods=100, freq='M')) res = ar_select_order(ts, maxlag=12, ic='aic', old_names=False) assert tuple(res.ar_lags) == (1, 2) assert isinstance(res.aic, dict) assert isinstance(res.bic, dict) assert isinstance(res.hqic, dict) assert isinstance(res.model, AutoReg) assert not res.seasonal assert res.trend == 'c' assert res.period is None
def fit_ar(x): n = len(x) ar_selection = ar_select_order(x, min(int(np.floor(n)), int(np.floor(10 * np.log10(n)))), ic='aic', trend='n', seasonal=False) order = ar_selection.ar_lags[-1] model_fit = ar_selection.model.fit() return model_fit.params, model_fit.sigma2, order
def sample(self, lagged_values, lagged_times=None, **ignored): """ Find Unique Values to see if outcomes are discrete or continuous """ rev_values = lagged_values[:: -1] # our data are in reverse order, the AR model needs the opposite ARmodel = ar_select_order(rev_values, maxlag=int(0.1 * len(rev_values))) model_fit = ARmodel.model.fit() point_est = model_fit.predict(start=len(rev_values), end=len(rev_values), dynamic=False)[0] st_dev = np.std(rev_values) ps = self.evenly_spaced_percentiles(self.num_predictions) vs = [point_est + st_dev * self.norminv(p) for p in ps] jiggle = 0.2 * abs(vs[114] - vs[113]) * np.random.rand() v_jiggled = [v + jiggle for v in vs] return v_jiggled
def rem_lags(csv_name): ''' Analyzing the Remainder and save the df in a csv file. Parameters ---------- train: pd.dataframe() The dataframe with the data Returns --------- train: pd.dataframe() The dataframe with all columns: . csv-file in the folder ''' train_re = pd.read_csv(csv_name, index_col=0, parse_dates=True) plot_pacf(train_re['remainder']) mod = ar_select_order(endog=train_re['remainder'], maxlag=10, old_names=False) lags = mod.ar_lags return train_re, lags
def fit_AR_p(): N, t, p, max_order = 200, 180, 1, 10 realisations = pd.Series(list(sample_random_walk(0, N)), range(N)) sel = ar_select_order(realisations[0:t], max_order) res = sel.model.fit() print(res.summary()) print("Std residuals: " + str(statistics.stdev(res.resid))) f = plt.figure(1) res.plot_diagnostics(fig=f, lags=30) plt.tight_layout() plt.savefig('/Users/gwren/Downloads/43_ar_1_fit_diagnostics.svg', format='svg') f.show() f = plt.figure(1) res.plot_predict(start=t, end=N) plt.plot(realisations[0:N], label="realisations") plt.legend(loc="upper left") plt.grid(True) plt.xlabel('Period ($t$)') plt.savefig('/Users/gwren/Downloads/44_ar_1_fit_forecasts.svg', format='svg') f.show()
def train_gv_AR(params_gv, gv, max_lag, sel_crit): """ Derive AR parameters of global variability under the assumption that gv does not depend on the scenario. Parameters ---------- params_gv : dict parameter dictionary containing keys which do not depend on applied method - ["targ"] (variable, i.e., tas or tblend, str) - ["esm"] (Earth System Model, str) - ["method"] (applied method, i.e., AR, str) - ["scenarios"] (emission scenarios used for training, list of strs) gv : dict nested global mean temperature variability (volcanic influence removed) dictionary with keys - [scen] (2d array (nr_runs, nr_ts) of globally-averaged temperature variability time series) max_lag: int maximum number of lags considered during fitting sel_crit: str selection criterion for the AR process order, e.g., 'bic' or 'aic' Returns ------- params : dict parameter dictionary containing original keys plus - ["max_lag"] (maximum lag considered when finding suitable AR model, hardcoded to 15 here, int) - ["sel_crit"] (selection criterion applied to find suitable AR model, hardcoded to Bayesian Information Criterion bic here, str) - ["AR_int"] (intercept of the AR model, float) - ["AR_coefs"] (coefficients of the AR model for the lags which are contained in the selected AR model, list of floats) - ["AR_order_sel"] (selected AR order, int) - ["AR_std_innovs"] (standard deviation of the innovations of the selected AR model, float) Notes ----- - Assumptions - number of runs per scenario and the number of time steps in each scenario can vary - each scenario receives equal weight during training """ params_gv["max_lag"] = max_lag params_gv["sel_crit"] = sel_crit # select the AR Order nr_scens = len(gv.keys()) AR_order_scens_tmp = np.zeros(nr_scens) for scen_idx, scen in enumerate(gv.keys()): nr_runs = gv[scen].shape[0] AR_order_runs_tmp = np.zeros(nr_runs) for run in np.arange(nr_runs): run_ar_lags = ar_select_order(gv[scen][run], maxlag=max_lag, ic=sel_crit, old_names=False).ar_lags # if order > 0 is selected,add selected order to vector if len(run_ar_lags) > 0: AR_order_runs_tmp[run] = run_ar_lags[-1] AR_order_scens_tmp[scen_idx] = np.percentile(AR_order_runs_tmp, q=50, interpolation="nearest") # interpolation is not a good way to go here because it could lead to an AR # order that wasn't chosen by run -> avoid it by just taking nearest AR_order_sel = int( np.percentile(AR_order_scens_tmp, q=50, interpolation="nearest")) # determine the AR params for the selected AR order params_gv["AR_int"] = 0 params_gv["AR_coefs"] = np.zeros(AR_order_sel) params_gv["AR_order_sel"] = AR_order_sel params_gv["AR_std_innovs"] = 0 for scen_idx, scen in enumerate(gv.keys()): nr_runs = gv[scen].shape[0] AR_order_runs_tmp = np.zeros(nr_runs) AR_int_tmp = 0 AR_coefs_tmp = np.zeros(AR_order_sel) AR_std_innovs_tmp = 0 for run in np.arange(nr_runs): AR_model_tmp = AutoReg(gv[scen][run], lags=AR_order_sel, old_names=False).fit() AR_int_tmp += AR_model_tmp.params[0] / nr_runs AR_coefs_tmp += AR_model_tmp.params[1:] / nr_runs AR_std_innovs_tmp += np.sqrt(AR_model_tmp.sigma2) / nr_runs params_gv["AR_int"] += AR_int_tmp / nr_scens params_gv["AR_coefs"] += AR_coefs_tmp / nr_scens params_gv["AR_std_innovs"] += AR_std_innovs_tmp / nr_scens # check if fitted AR process is stationary # (highly unlikely this test will ever fail but better safe than sorry) ar = np.r_[1, -params_gv["AR_coefs"]] # add zero-lag and negate ma = np.r_[1] # add zero-lag arma_process = sm.tsa.ArmaProcess(ar, ma) if not arma_process.isstationary: raise ValueError( "The fitted AR process is not stationary. Another solution is needed." ) return params_gv
# Right now an annual date series must be datetimes at the end of the # year. from datetime import datetime dates = pd.date_range("1700-1-1", periods=len(data.endog), freq="A-DEC") # ## Using Pandas # # Make a pandas TimeSeries or DataFrame data.endog.index = dates endog = data.endog endog # Instantiate the model selection_res = ar_select_order(endog, 9, old_names=False, seasonal=True, period=11) pandas_ar_res = selection_res.model.fit() # Out-of-sample prediction pred = pandas_ar_res.predict(start="2005", end="2027") print(pred) fig = pandas_ar_res.plot_predict(start="2005", end="2027")
from statsmodels.tsa.ar_model import AutoReg, ar_select_order from statsmodels.tsa.api import acf, pacf, graphics pd.plotting.register_matplotlib_converters() # Default figure size sns.mpl.rc('figure', figsize=(16, 6)) # %% ts = stats_df[['created_at', 'percentage_correct']].set_index('created_at').dropna() temp = ts.asfreq('1H', method='pad') # Scale by 100 to get percentages fig, ax = plt.subplots() ax = ts.plot(ax=ax) plt.show() # %% mod = AutoReg(ts, 3, old_names=False) res = mod.fit() print(res.summary()) # %% res = mod.fit(cov_type="HC0") print(res.summary()) # %% sel = ar_select_order(ts, 13, old_names=False) sel.ar_lags res = sel.model.fit() print(res.summary()) # %% fig = plt.figure(figsize=(16, 9)) fig = res.plot_diagnostics(fig=fig, lags=30) plt.show()
def test_ar_select_order_smoke(): data = sunspots.load(as_pandas=True).data["SUNACTIVITY"] ar_select_order(data, 4, glob=True, trend="n") ar_select_order(data, 4, glob=False, trend="n") ar_select_order(data, 4, seasonal=True, period=12) ar_select_order(data, 4, seasonal=False) ar_select_order(data, 4, glob=True) ar_select_order(data, 4, glob=True, seasonal=True, period=12)
lh, rh, p = m.getPanda(twitterColumns, pollColumns) h_agg, p_agg, p_var = m.aggregate(lh, rh, p, splitPolls=False, interpolate=True) _, p_onl, p_tel = m.aggregate(lh, rh, p, splitPolls=True, interpolate=True) kalmanData = m.getKalmanData(p_agg, h_agg) all_data = kalmanData['remain_perc'].iloc[startTrain:] remain_data = all_data.values dates_train = all_data.index # run experiment for lags print("order to use", ar_select_order(remain_data, maxlag=13)._aic) n_lag = 7 runs = 100 k = np.arange(4, 5) res = np.zeros(shape=(len(k), n_lag - 1)) sorted = np.zeros(shape=(len(k), n_lag - 1)) for i, j in enumerate(k): res[i] = experiment_lags(remain_data, n_lag, runs, j) print("sum of results: " + str(np.sum(res, axis=0))) optimal_lag = np.argmin(np.sum(res, axis=0)) + 1 print("optimal lag = " + str(optimal_lag)) optimal_lag = 1 runs = 3
# We can start with an AR(3). While this is not a good model for this # data, it demonstrates the basic use of the API. mod = AutoReg(housing, 3, old_names=False) res = mod.fit() print(res.summary()) # `AutoReg` supports the same covariance estimators as `OLS`. Below, we # use `cov_type="HC0"`, which is White's covariance estimator. While the # parameter estimates are the same, all of the quantities that depend on the # standard error change. res = mod.fit(cov_type="HC0") print(res.summary()) sel = ar_select_order(housing, 13, old_names=False) sel.ar_lags res = sel.model.fit() print(res.summary()) # `plot_predict` visualizes forecasts. Here we produce a large number of # forecasts which show the string seasonality captured by the model. fig = res.plot_predict(720, 840) # `plot_diagnositcs` indicates that the model captures the key features in # the data. fig = plt.figure(figsize=(16, 9)) fig = res.plot_diagnostics(fig=fig, lags=30)
def AutoRegression(df_input, target_column, time_column, epochs_to_forecast=1, epochs_to_test=1, hyper_params_ar={}): """ This function performs regression using feature augmentation and then training XGB with Crossvalidation. Parameters: - df_input (pandas.DataFrame): Input Time Series. - target_column (str): name of the column containing the target feature - time_column (str): name of the column containing the pandas Timestamps - frequency_data (str): string representing the time frequency of record, e.g. "h" (hours), "D" (days), "M" (months) - epochs_to_forecast (int): number of steps for predicting future data - epochs_to_test (int): number of steps corresponding to most recent records to test on - hyper_params_ar: Parameters of AR model Returns: - df_output (pandas.DataFrame): Output DataFrame with forecast """ # create and evaluate an updated autoregressive model # load dataset input_series = df_input[:-(epochs_to_forecast+epochs_to_test)].set_index(time_column, 1)[target_column] # split dataset model = ar_select_order(input_series, **hyper_params_ar) for hyp_param in ["maxlag","glob","ic"]: if hyp_param in hyper_params_ar.keys(): del hyper_params_ar[hyp_param] model = AutoReg(input_series, lags=model.ar_lags, **hyper_params_ar) res = model.fit() print(res.summary()) #start_idx = df_input[:-(epochs_to_forecast+epochs_to_test)][time_column].max() start_idx = df_input[-(epochs_to_forecast+epochs_to_test):][time_column].min() end_idx = df_input[-(epochs_to_forecast+epochs_to_test):][time_column].max() # ============================================================================= # ### for statsmodels< 0.12.0 # #forecast_steps = model.predict(res.params, start=start_idx, end=end_idx, dynamic=True) # forecast = df_input[target_column] * np.nan # forecast[-(epochs_to_forecast+epochs_to_test):] = forecast_steps # df_output = df_input.copy() # df_output["forecast"] = forecast # df_output["forecast_up"] = forecast * 1.1 # df_output["forecast_low"] = forecast * 0.9 # ============================================================================= ### for statsmodels>= 0.12.0 forecast_steps = res.get_prediction(start=start_idx, end=end_idx) forecast_steps_mean = forecast_steps.predicted_mean forecast_steps_low = forecast_steps.conf_int()["lower"] forecast_steps_up = forecast_steps.conf_int()["upper"] forecast = df_input[target_column] * np.nan forecast_low = df_input[target_column] * np.nan forecast_up = df_input[target_column] * np.nan forecast[-(epochs_to_forecast+epochs_to_test):] = forecast_steps_mean forecast_low[-(epochs_to_forecast+epochs_to_test):] = forecast_steps_low forecast_up[-(epochs_to_forecast+epochs_to_test):] = forecast_steps_up df_output = df_input.copy() df_output["forecast"] = forecast df_output["forecast_low"] = forecast_low df_output["forecast_up"] = forecast_up return df_output
import matplotlib.pyplot as plt import pandas as pd import pandas_datareader as pdr import seaborn as sns from statsmodels.tsa.ar_model import AutoReg, ar_select_order from statsmodels.tsa.api import acf, pacf, graphics import numpy as np data = pdr.get_data_fred('INDPRO', '1959-01-01', '2019-06-01') ind_prod = data.INDPRO.pct_change(12).dropna().asfreq('MS') _, ax = plt.subplots(figsize=(16, 9)) ind_prod.plot(ax=ax) sel = ar_select_order(ind_prod, 13, 'bic', old_names=False) res = sel.model.fit() print(res.summary()) sel = ar_select_order(ind_prod, 13, 'bic', glob=True, old_names=False) sel.ar_lags res_glob = sel.model.fit() print(res.summary()) ind_prod.shape fig = res_glob.plot_predict(start=714, end=732) res_ar5 = AutoReg(ind_prod, 5, old_names=False).fit() predictions = pd.DataFrame({ "AR(5)": res_ar5.predict(start=714, end=726), "AR(13)":
### Plot sealevel ### plt.plot(data["year"],data["GMSLNA"]) plt.show() x = data["GMSLNA"] ### ACF and PACF ### graphics.plot_acf(x) graphics.plot_pacf(x) plt.show() ### AR ### sel = ar_select_order(x, 13, old_names=False, seasonal=True, period=37) sel.ar_lags res = sel.model.fit() ax = res.plot_predict(1000, 1100) ax = x.plot(fig=ax) plt.show() ### ARIMA RANDOM WALK ### mod1 = ARIMA(x, seasonal_order = (0,1,0, 37)) res1 = mod1.fit() predict = res1.get_forecast(100) predictions = predict.predicted_mean[-100:]
def train( data: np.ndarray, used_model: str = "autoreg", p: int = 5, d: int = 1, q: int = 0, cov_type="nonrobust", method="cmle", trend="nc", solver="lbfgs", maxlag=13, # SARIMAX args seasonal=(0, 0, 0, 0), ) -> Any: """Autoregressive model from statsmodels library. Only univariate data. Args: data (np.ndarray): Time series data. used_model (str, optional): Used model. Defaults to "autoreg". p (int, optional): Order of ARIMA model (1st - proportional). Check statsmodels docs for more. Defaults to 5. d (int, optional): Order of ARIMA model. Defaults to 1. q (int, optional): Order of ARIMA model. Defaults to 0. cov_type: Parameters of model call or fit function of particular model. Check statsmodels docs for more. Defaults to 'nonrobust'. method: Parameters of model call or fit function of particular model. Check statsmodels docs for more. Defaults to 'cmle'. trend: Parameters of model call or fit function of particular model. Check statsmodels docs for more. Defaults to 'nc'. solver: Parameters of model call or fit function of particular model. Check statsmodels docs for more. Defaults to 'lbfgs'. maxlag: Parameters of model call or fit function of particular model. Check statsmodels docs for more. Defaults to 13. seasonal: Parameters of model call or fit function of particular model. Check statsmodels docs for more. Defaults to (0, 0, 0, 0). Returns: statsmodels.model: Trained model. """ import statsmodels.tsa.api as sm from statsmodels.tsa.statespace.sarimax import SARIMAX from statsmodels.tsa.arima.model import ARIMA from statsmodels.tsa import ar_model used_model = used_model.lower() if used_model == "ar": model = sm.AR(data) fitted_model = model.fit(method=method, trend=trend, solver=solver, disp=0) elif used_model == "arima": order = (p, d, q) model = ARIMA(data, order=order) fitted_model = model.fit() elif used_model == "sarimax": order = (p, d, q) model = SARIMAX(data, order=order, seasonal_order=seasonal) fitted_model = model.fit(method=method, trend=trend, solver=solver, disp=0) elif used_model == "autoreg": auto = ar_model.ar_select_order(data, maxlag=maxlag) model = ar_model.AutoReg( data, lags=auto.ar_lags, trend=auto.trend, seasonal=auto.seasonal, period=auto.period, ) fitted_model = model.fit(cov_type=cov_type) else: raise ValueError( f"Used model has to be one of ['ar', 'arima', 'sarimax', 'autoreg']. You configured: {used_model}" ) setattr(fitted_model, "my_name", used_model) setattr(fitted_model, "data_length", len(data)) return fitted_model
import numpy as np import statsmodels.api as sm import statsmodels.tsa.stattools as stattools from statsmodels.tsa.ar_model import AutoReg, ar_select_order from arch.univariate import ARX, GARCH from arch import arch_model data = pd.read_excel('hw6/NYSEReturns.38135430_第三章.xlsx') rates = data['RATE'].dropna().to_numpy() # Statistic description plt.plot(rates) sm.graphics.tsa.plot_acf(rates) # ARCH effect ar_res = ar_select_order(rates, 5).model.fit() # Test of no serial correlation and homoskedasticity print(ar_res.diagnostic_summary()) print(ar_res.summary()) plt.figure() plt.plot(ar_res.resid) # a = ar_res.resid # a_res = ar_select_order(a, 5).model.fit() # print(a_res.diagnostic_summary()) # Fit with GARCH(p, q) ar = ARX(rates, lags=[1, 2]) # Mean model ar.volatility = GARCH(p=1, q=1) # Volatility model res = ar.fit() res.plot()
def test_ar_select_order_smoke(): data = sm.datasets.sunspots.load(as_pandas=True).data['SUNACTIVITY'] ar_select_order(data, 4, glob=True, trend='n', old_names=False) ar_select_order(data, 4, glob=False, trend='n', old_names=False) ar_select_order(data, 4, seasonal=True, period=12, old_names=False) ar_select_order(data, 4, seasonal=False, old_names=False) ar_select_order(data, 4, glob=True, old_names=False) ar_select_order(data, 4, glob=True, seasonal=True, period=12, old_names=False)
fig, axes = plt.subplots(1, 2, clear=True, figsize=(10, 5)) #df.plot(ax=axes[0], title="$\Delta$ log(GDPC1)") plot_acf(df.values.squeeze(), lags=20, ax=axes[0]) plot_pacf(df, lags=20, ax=axes[1]) plt.tight_layout(pad=2) plt.savefig(os.path.join(imgdir, 'acf.jpg')) plt.show() # Select AR lag order with BIC from statsmodels.tsa.ar_model import AutoReg, ar_select_order s = 'GDPC1' # real gdp, seasonally adjusted df = alf(s, log=1, diff=1, start=19591201, freq='Q').loc[:20191231].dropna() df.index = pd.DatetimeIndex(df.index.astype(str), freq='infer') df_train = df[df.index <= '2017-12-31'] df_test = df[df.index > '2017-12-31'] lags = ar_select_order(df_train, maxlag=13, ic='bic', old_names=False).ar_lags print('(BIC) lags= ', len(lags), ':', lags) # AR and SARIMAX ## AR(p) is simplest time-model, can nest in SARIMAX(p,d,q,s) with ## moving average MA(q), integration order I(d), seasonality S(s), exogenous X from statsmodels.tsa.statespace.sarimax import SARIMAX adf = alf(s, log=1, freq='Q').loc[19591201:20171231] adf.index = pd.DatetimeIndex(adf.index.astype(str), freq='infer') arima = SARIMAX(adf, order=(2, 1, 0), trend='c').fit() fig = arima.plot_diagnostics(figsize=(10, 6)) plt.tight_layout(pad=2) plt.savefig(os.path.join(imgdir, 'ar.jpg')) plt.show() arima.summary()
import pandas as pd import matplotlib.pyplot as plt from math import sqrt, pi, acos from statsmodels.tsa.ar_model import ar_select_order from statsmodels.tsa.arima.model import ARIMA from numpy.polynomial.polynomial import polyroots # Load data gnp = pd.read_csv('../data/dgnp82.txt', delimiter='\s+', header=None, names=['gnp']) # create a time-series object gnp = pd.DataFrame({"gnp": gnp['gnp'].to_list()}, index=pd.date_range(start='1947-05', freq='Q', periods=len(gnp))) # plot gnp.plot() plt.show() # Find the AR order m1 = ar_select_order(gnp, maxlag=13, ic='aic') print(f"AR order: {m1.ar_lags[-1]}") m2 = ARIMA(gnp, order=(m1.ar_lags[-1], 0, 0)) res = m2.fit() # Estimation print(res.summary()) # ‘‘const’’ denotes the mean of the series. # Therefore, the constant term is obtained below: tmp = 1 for i in range(1, len(res.params) - 1): tmp -= res.params[i] const = res.params[0] * tmp print(f"const: {const}") # Residual standard error print(f"Residual standard error: {sqrt(res.params[-1])}")
fig, ax = plt.subplots() ax = housing.plot(ax=ax) ''' We can start with an AR(3). While this is not a good model for this data, it demonstrates the basic use of the API. ''' mod = AutoReg(housing, 3, old_names=False) res = mod.fit() #print(res.summary()) ''' AutoReg supports the same covariance estimators as OLS. Below, we use cov_type="HC0", which is White’s covariance estimator. While the parameter estimates are the same, all of the quantities that depend on the standard error change. ''' res = mod.fit(cov_type="HC0") #print(res.summary()) sel = ar_select_order(housing, 13, old_names=False) sel.ar_lags res = sel.model.fit() #print(res.summary()) #fig = res.plot_predict(720, 840) # fig = plt.figure(figsize=(16,9)) # fig = res.plot_diagnostics(fig=fig, lags=30) sel = ar_select_order(housing, 13, seasonal=True, old_names=False) sel.ar_lags res = sel.model.fit() print(res.summary()) yoy_housing = data.HOUSTNSA.pct_change(12).resample("MS").last().dropna() _, ax = plt.subplots()