示例#1
0
def unitroot_test(series):
    # Basic statistic
    plt.figure()
    plt.plot(series)
    plot_pacf(series)

    # ADF test
    # AIC & BIC from lags 12 to 1
    print('$p$ & AIC & BIC \\\\')
    max_lags = 12
    for lags in (max_lags - i for i in range(max_lags)):
        ar_model = AutoReg(series, lags, 'n')
        res = ar_model.fit()
        print(f'{lags} & {round(res.aic, 3)} & {round(res.bic, 3)} \\\\')

    # Best lags by `ar_select_order`
    sel = ar_select_order(series, max_lags, trend='n')
    lags = sel.ar_lags[-1]
    print(f'Lags selection: {sel.ar_lags}')

    # Start ADF test
    adf = ADF(series, lags)
    print(adf.summary())

    # PP test
    pp_tau = PhillipsPerron(series, 3, test_type='tau')  # q = 3
    pp_rho = PhillipsPerron(series, 3, test_type='rho')  # q = 3
    print(pp_tau.summary())
    print(pp_rho.summary())
示例#2
0
 def sample(self, lagged_values, lagged_times=None, **ignored):
     """ Find Unique Values to see if outcomes are discrete or continuous """
     uniques = np.unique(lagged_values)
     if len(uniques) < 0.2 * len(
             lagged_values
     ):  #arbitrary cutoff of 20% to determine whether outcomes are continuous or quantized
         v = [
             s for s in (
                 np.random.choice(lagged_values, self.num_predictions))
         ]  #randomly select from the lagged values and return as answer
     else:
         rev_values = lagged_values[::
                                    -1]  # our data are in reverse order, the AR model needs the opposite
         """ Simple Autoregression """
         ARmodel = ar_select_order(rev_values,
                                   maxlag=int(0.1 * len(rev_values)))
         model_fit = ARmodel.model.fit()
         point_est = model_fit.predict(start=len(rev_values),
                                       end=len(rev_values),
                                       dynamic=False)
         st_dev = np.std(rev_values)
         #v = [s for s in (np.random.normal(point_est, st_dev, self.num_predictions))]
         v = [
             s for s in (np.linspace(start=point_est - 2 * st_dev,
                                     stop=point_est + 2 * st_dev,
                                     num=self.num_predictions))
         ]  #spread the predictions out evenly
         print(*v, sep=", ")
     return v
示例#3
0
def model(columna, n_periods):
    mod = ar_select_order(columna.ravel(), maxlag=15, old_names=True)
    AutoRegfit = AutoReg(columna, trend='c', lags=mod.ar_lags,
                         old_names=True).fit()
    prediccion = AutoRegfit.predict(start=len(columna),
                                    end=len(columna) + n_periods - 1,
                                    dynamic=False)
    return prediccion
示例#4
0
def get_geweke_diags(chains, split=0.3, skip=0.5):
    """Function computes geweke statistic for markov chains"""
    # Check dimensionality of chains
    # If single chain add dimesion
    n_dims = len(chains.shape)
    if n_dims == 2:
        chains = np.expand_dims(chains, axis=0)

    # Compute split demarcations as integers to be used for indexing
    n_floor = int(chains.shape[1] * (split + skip))
    n_skip = int(chains.shape[1] * (skip))

    # Initialize the vector in which we store z-scores
    z_scores = np.zeros(chains.shape[0] * chains.shape[2])

    # Main loop that computes statistics of interest
    for i in range(chains.shape[0]):
        for j in range(chains.shape[2]):
            # Get Autoregression coefficients for each part of split of chain
            sel_1 = ar_select_order(chains[i, n_skip:n_floor, j],
                                    maxlag=10,
                                    seasonal=False)
            sel_2 = ar_select_order(chains[i, n_floor:, j],
                                    maxlag=10,
                                    seasonal=False)
            res_1 = sel_1.model.fit()
            res_2 = sel_2.model.fit()

            # Compute the Autoregression corrected respective standard deviations
            s_1 = res_1.sigma2 / np.square(1 - np.sum(res_1.params[1:]))
            s_2 = res_2.sigma2 / np.square(1 - np.sum(res_2.params[1:]))

            # Compute (absolute) z scores that form the basis of the test of whether or not to continue sampling
            z_scores[i * chains.shape[2] + j] = np.abs(
                (np.mean(chains[i, n_skip:n_floor, j]) -
                 np.mean(chains[i, n_floor:, j])) /
                np.sqrt((1 / (n_floor - n_skip)) * s_1 +
                        (1 / (chains.shape[1] - n_floor)) * s_2))

    # Continuation check: All absolute z scores below 2? If yes stop sampling
    continue_ = int((np.sum(z_scores > 2)) > 0)
    return continue_, z_scores
示例#5
0
def ar_model(data):
    from statsmodels.tsa.ar_model import AutoReg, ar_select_order

    sel = ar_select_order(data["GMSLNA"],
                          20,
                          old_names=False,
                          seasonal=True,
                          period=12)
    res = sel.model.fit()

    return res
示例#6
0
def test_ar_order_select():
    # GH#2118
    np.random.seed(12345)
    y = arma_generate_sample([1, -0.75, 0.3], [1], 100)
    ts = Series(y, index=date_range(start="1/1/1990", periods=100, freq="M"))
    res = ar_select_order(ts, maxlag=12, ic="aic")
    assert tuple(res.ar_lags) == (1, 2)
    assert isinstance(res.aic, dict)
    assert isinstance(res.bic, dict)
    assert isinstance(res.hqic, dict)
    assert isinstance(res.model, AutoReg)
    assert not res.seasonal
    assert res.trend == "c"
    assert res.period is None
示例#7
0
def test_ar_order_select():
    # GH#2118
    np.random.seed(12345)
    y = sm.tsa.arma_generate_sample([1, -.75, .3], [1], 100)
    ts = Series(y, index=date_range(start='1/1/1990', periods=100, freq='M'))
    res = ar_select_order(ts, maxlag=12, ic='aic', old_names=False)
    assert tuple(res.ar_lags) == (1, 2)
    assert isinstance(res.aic, dict)
    assert isinstance(res.bic, dict)
    assert isinstance(res.hqic, dict)
    assert isinstance(res.model, AutoReg)
    assert not res.seasonal
    assert res.trend == 'c'
    assert res.period is None
示例#8
0
文件: fit_ar.py 项目: aspannaus/kanga
def fit_ar(x):
    n = len(x)

    ar_selection = ar_select_order(x,
                                   min(int(np.floor(n)),
                                       int(np.floor(10 * np.log10(n)))),
                                   ic='aic',
                                   trend='n',
                                   seasonal=False)

    order = ar_selection.ar_lags[-1]

    model_fit = ar_selection.model.fit()

    return model_fit.params, model_fit.sigma2, order
示例#9
0
 def sample(self, lagged_values, lagged_times=None, **ignored):
     """ Find Unique Values to see if outcomes are discrete or continuous """
     rev_values = lagged_values[::
                                -1]  # our data are in reverse order, the AR model needs the opposite
     ARmodel = ar_select_order(rev_values,
                               maxlag=int(0.1 * len(rev_values)))
     model_fit = ARmodel.model.fit()
     point_est = model_fit.predict(start=len(rev_values),
                                   end=len(rev_values),
                                   dynamic=False)[0]
     st_dev = np.std(rev_values)
     ps = self.evenly_spaced_percentiles(self.num_predictions)
     vs = [point_est + st_dev * self.norminv(p) for p in ps]
     jiggle = 0.2 * abs(vs[114] - vs[113]) * np.random.rand()
     v_jiggled = [v + jiggle for v in vs]
     return v_jiggled
示例#10
0
def rem_lags(csv_name):
    '''
    Analyzing the Remainder and save the df in a csv file.
    Parameters
    ----------
    train: pd.dataframe()
        The dataframe with the data
    Returns
    ---------
    train: pd.dataframe()
        The dataframe with all columns: .
    csv-file in the folder
    '''
    train_re = pd.read_csv(csv_name, index_col=0, parse_dates=True)
    plot_pacf(train_re['remainder'])
    mod = ar_select_order(endog=train_re['remainder'], maxlag=10, old_names=False) 
    lags = mod.ar_lags
    return train_re, lags
示例#11
0
def fit_AR_p():
    N, t, p, max_order = 200, 180, 1, 10
    realisations = pd.Series(list(sample_random_walk(0, N)), range(N))
    sel = ar_select_order(realisations[0:t], max_order)
    res = sel.model.fit()
    print(res.summary())
    print("Std residuals: " + str(statistics.stdev(res.resid)))

    f = plt.figure(1)
    res.plot_diagnostics(fig=f, lags=30)
    plt.tight_layout()
    plt.savefig('/Users/gwren/Downloads/43_ar_1_fit_diagnostics.svg',
                format='svg')
    f.show()

    f = plt.figure(1)
    res.plot_predict(start=t, end=N)
    plt.plot(realisations[0:N], label="realisations")
    plt.legend(loc="upper left")
    plt.grid(True)
    plt.xlabel('Period ($t$)')
    plt.savefig('/Users/gwren/Downloads/44_ar_1_fit_forecasts.svg',
                format='svg')
    f.show()
示例#12
0
def train_gv_AR(params_gv, gv, max_lag, sel_crit):
    """
    Derive AR parameters of global variability under the assumption that gv does not
    depend on the scenario.

    Parameters
    ----------
    params_gv : dict
        parameter dictionary containing keys which do not depend on applied method

        - ["targ"] (variable, i.e., tas or tblend, str)
        - ["esm"] (Earth System Model, str)
        - ["method"] (applied method, i.e., AR, str)
        - ["scenarios"] (emission scenarios used for training, list of strs)
    gv : dict
        nested global mean temperature variability (volcanic influence removed)
        dictionary with keys

        - [scen] (2d array (nr_runs, nr_ts) of globally-averaged temperature variability
          time series)
    max_lag: int
        maximum number of lags considered during fitting
    sel_crit: str
        selection criterion for the AR process order, e.g., 'bic' or 'aic'

    Returns
    -------
    params : dict
        parameter dictionary containing original keys plus

        - ["max_lag"] (maximum lag considered when finding suitable AR model, hardcoded
          to 15 here, int)
        - ["sel_crit"] (selection criterion applied to find suitable AR model, hardcoded
          to Bayesian Information Criterion bic here, str)
        - ["AR_int"] (intercept of the AR model, float)
        - ["AR_coefs"] (coefficients of the AR model for the lags which are contained in
          the selected AR model, list of floats)
        - ["AR_order_sel"] (selected AR order, int)
        - ["AR_std_innovs"] (standard deviation of the innovations of the selected AR
          model, float)

    Notes
    -----
    - Assumptions
        - number of runs per scenario and the number of time steps in each scenario can
        vary
        - each scenario receives equal weight during training

    """

    params_gv["max_lag"] = max_lag
    params_gv["sel_crit"] = sel_crit

    # select the AR Order
    nr_scens = len(gv.keys())
    AR_order_scens_tmp = np.zeros(nr_scens)

    for scen_idx, scen in enumerate(gv.keys()):
        nr_runs = gv[scen].shape[0]
        AR_order_runs_tmp = np.zeros(nr_runs)

        for run in np.arange(nr_runs):
            run_ar_lags = ar_select_order(gv[scen][run],
                                          maxlag=max_lag,
                                          ic=sel_crit,
                                          old_names=False).ar_lags
            # if order > 0 is selected,add selected order to vector
            if len(run_ar_lags) > 0:
                AR_order_runs_tmp[run] = run_ar_lags[-1]

        AR_order_scens_tmp[scen_idx] = np.percentile(AR_order_runs_tmp,
                                                     q=50,
                                                     interpolation="nearest")
        # interpolation is not a good way to go here because it could lead to an AR
        # order that wasn't chosen by run -> avoid it by just taking nearest

    AR_order_sel = int(
        np.percentile(AR_order_scens_tmp, q=50, interpolation="nearest"))

    # determine the AR params for the selected AR order
    params_gv["AR_int"] = 0
    params_gv["AR_coefs"] = np.zeros(AR_order_sel)
    params_gv["AR_order_sel"] = AR_order_sel
    params_gv["AR_std_innovs"] = 0

    for scen_idx, scen in enumerate(gv.keys()):
        nr_runs = gv[scen].shape[0]
        AR_order_runs_tmp = np.zeros(nr_runs)
        AR_int_tmp = 0
        AR_coefs_tmp = np.zeros(AR_order_sel)
        AR_std_innovs_tmp = 0

        for run in np.arange(nr_runs):
            AR_model_tmp = AutoReg(gv[scen][run],
                                   lags=AR_order_sel,
                                   old_names=False).fit()
            AR_int_tmp += AR_model_tmp.params[0] / nr_runs
            AR_coefs_tmp += AR_model_tmp.params[1:] / nr_runs
            AR_std_innovs_tmp += np.sqrt(AR_model_tmp.sigma2) / nr_runs

        params_gv["AR_int"] += AR_int_tmp / nr_scens
        params_gv["AR_coefs"] += AR_coefs_tmp / nr_scens
        params_gv["AR_std_innovs"] += AR_std_innovs_tmp / nr_scens

    # check if fitted AR process is stationary
    # (highly unlikely this test will ever fail but better safe than sorry)
    ar = np.r_[1, -params_gv["AR_coefs"]]  # add zero-lag and negate
    ma = np.r_[1]  # add zero-lag
    arma_process = sm.tsa.ArmaProcess(ar, ma)
    if not arma_process.isstationary:
        raise ValueError(
            "The fitted AR process is not stationary. Another solution is needed."
        )

    return params_gv
示例#13
0
# Right now an annual date series must be datetimes at the end of the
# year.

from datetime import datetime

dates = pd.date_range("1700-1-1", periods=len(data.endog), freq="A-DEC")

# ## Using Pandas
#
# Make a pandas TimeSeries or DataFrame

data.endog.index = dates
endog = data.endog
endog

# Instantiate the model

selection_res = ar_select_order(endog,
                                9,
                                old_names=False,
                                seasonal=True,
                                period=11)
pandas_ar_res = selection_res.model.fit()

# Out-of-sample prediction

pred = pandas_ar_res.predict(start="2005", end="2027")
print(pred)

fig = pandas_ar_res.plot_predict(start="2005", end="2027")
示例#14
0
from statsmodels.tsa.ar_model import AutoReg, ar_select_order
from statsmodels.tsa.api import acf, pacf, graphics

pd.plotting.register_matplotlib_converters()
# Default figure size
sns.mpl.rc('figure', figsize=(16, 6))
# %%
ts = stats_df[['created_at',
               'percentage_correct']].set_index('created_at').dropna()
temp = ts.asfreq('1H', method='pad')
# Scale by 100 to get percentages
fig, ax = plt.subplots()
ax = ts.plot(ax=ax)
plt.show()
# %%
mod = AutoReg(ts, 3, old_names=False)
res = mod.fit()
print(res.summary())
# %%
res = mod.fit(cov_type="HC0")
print(res.summary())
# %%
sel = ar_select_order(ts, 13, old_names=False)
sel.ar_lags
res = sel.model.fit()
print(res.summary())
# %%
fig = plt.figure(figsize=(16, 9))
fig = res.plot_diagnostics(fig=fig, lags=30)
plt.show()
示例#15
0
def test_ar_select_order_smoke():
    data = sunspots.load(as_pandas=True).data["SUNACTIVITY"]
    ar_select_order(data, 4, glob=True, trend="n")
    ar_select_order(data, 4, glob=False, trend="n")
    ar_select_order(data, 4, seasonal=True, period=12)
    ar_select_order(data, 4, seasonal=False)
    ar_select_order(data, 4, glob=True)
    ar_select_order(data, 4, glob=True, seasonal=True, period=12)
示例#16
0
    lh, rh, p = m.getPanda(twitterColumns, pollColumns)
    h_agg, p_agg, p_var = m.aggregate(lh,
                                      rh,
                                      p,
                                      splitPolls=False,
                                      interpolate=True)
    _, p_onl, p_tel = m.aggregate(lh, rh, p, splitPolls=True, interpolate=True)

    kalmanData = m.getKalmanData(p_agg, h_agg)

    all_data = kalmanData['remain_perc'].iloc[startTrain:]
    remain_data = all_data.values
    dates_train = all_data.index

    # run experiment for lags
    print("order to use", ar_select_order(remain_data, maxlag=13)._aic)
    n_lag = 7
    runs = 100
    k = np.arange(4, 5)
    res = np.zeros(shape=(len(k), n_lag - 1))
    sorted = np.zeros(shape=(len(k), n_lag - 1))

    for i, j in enumerate(k):
        res[i] = experiment_lags(remain_data, n_lag, runs, j)

    print("sum of results: " + str(np.sum(res, axis=0)))
    optimal_lag = np.argmin(np.sum(res, axis=0)) + 1
    print("optimal lag = " + str(optimal_lag))

    optimal_lag = 1
    runs = 3
示例#17
0
# We can start with an AR(3).  While this is not a good model for this
# data, it demonstrates the basic use of the API.

mod = AutoReg(housing, 3, old_names=False)
res = mod.fit()
print(res.summary())

# `AutoReg` supports the same covariance estimators as `OLS`.  Below, we
# use `cov_type="HC0"`, which is White's covariance estimator. While the
# parameter estimates are the same, all of the quantities that depend on the
# standard error change.

res = mod.fit(cov_type="HC0")
print(res.summary())

sel = ar_select_order(housing, 13, old_names=False)
sel.ar_lags
res = sel.model.fit()
print(res.summary())

# `plot_predict` visualizes forecasts.  Here we produce a large number of
# forecasts which show the string seasonality captured by the model.

fig = res.plot_predict(720, 840)

# `plot_diagnositcs` indicates that the model captures the key features in
# the data.

fig = plt.figure(figsize=(16, 9))
fig = res.plot_diagnostics(fig=fig, lags=30)
def AutoRegression(df_input, 
                  target_column, 
                  time_column, 
                  epochs_to_forecast=1, 
                  epochs_to_test=1,
                  hyper_params_ar={}):
    """
    This function performs regression using feature augmentation and then training XGB with Crossvalidation.
    Parameters:
        - df_input (pandas.DataFrame): Input Time Series.
        - target_column (str): name of the column containing the target feature
        - time_column (str): name of the column containing the pandas Timestamps
        - frequency_data (str): string representing the time frequency of record, e.g. "h" (hours), "D" (days), "M" (months)
        - epochs_to_forecast (int): number of steps for predicting future data
        - epochs_to_test (int): number of steps corresponding to most recent records to test on
        - hyper_params_ar: Parameters of AR model
    Returns:
        - df_output (pandas.DataFrame): Output DataFrame with forecast
    """
    
    # create and evaluate an updated autoregressive model
    
    # load dataset
    input_series = df_input[:-(epochs_to_forecast+epochs_to_test)].set_index(time_column, 1)[target_column]
    # split dataset
    model = ar_select_order(input_series, **hyper_params_ar)
    for hyp_param in ["maxlag","glob","ic"]:
        if hyp_param in hyper_params_ar.keys():
            del hyper_params_ar[hyp_param]
            
    model = AutoReg(input_series, lags=model.ar_lags, **hyper_params_ar)
    res = model.fit()
    print(res.summary())
    
    #start_idx = df_input[:-(epochs_to_forecast+epochs_to_test)][time_column].max()
    start_idx = df_input[-(epochs_to_forecast+epochs_to_test):][time_column].min()
    end_idx = df_input[-(epochs_to_forecast+epochs_to_test):][time_column].max()
    
# =============================================================================
#     ### for statsmodels< 0.12.0
#     #forecast_steps = model.predict(res.params, start=start_idx, end=end_idx, dynamic=True)
#     forecast = df_input[target_column] * np.nan
#     forecast[-(epochs_to_forecast+epochs_to_test):] = forecast_steps
#     df_output = df_input.copy()
#     df_output["forecast"] = forecast
#     df_output["forecast_up"] = forecast * 1.1
#     df_output["forecast_low"] = forecast * 0.9
# =============================================================================

    ### for statsmodels>= 0.12.0
    forecast_steps = res.get_prediction(start=start_idx, end=end_idx)
    forecast_steps_mean = forecast_steps.predicted_mean
    forecast_steps_low = forecast_steps.conf_int()["lower"]
    forecast_steps_up = forecast_steps.conf_int()["upper"]
    forecast = df_input[target_column] * np.nan    
    forecast_low = df_input[target_column] * np.nan
    forecast_up = df_input[target_column] * np.nan
    forecast[-(epochs_to_forecast+epochs_to_test):] = forecast_steps_mean
    forecast_low[-(epochs_to_forecast+epochs_to_test):] = forecast_steps_low
    forecast_up[-(epochs_to_forecast+epochs_to_test):] = forecast_steps_up
    df_output = df_input.copy()
    df_output["forecast"] = forecast
    df_output["forecast_low"] = forecast_low
    df_output["forecast_up"] = forecast_up
    
    return df_output
示例#19
0
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as pdr
import seaborn as sns
from statsmodels.tsa.ar_model import AutoReg, ar_select_order
from statsmodels.tsa.api import acf, pacf, graphics
import numpy as np

data = pdr.get_data_fred('INDPRO', '1959-01-01', '2019-06-01')
ind_prod = data.INDPRO.pct_change(12).dropna().asfreq('MS')
_, ax = plt.subplots(figsize=(16, 9))
ind_prod.plot(ax=ax)

sel = ar_select_order(ind_prod, 13, 'bic', old_names=False)
res = sel.model.fit()
print(res.summary())

sel = ar_select_order(ind_prod, 13, 'bic', glob=True, old_names=False)
sel.ar_lags
res_glob = sel.model.fit()
print(res.summary())

ind_prod.shape

fig = res_glob.plot_predict(start=714, end=732)

res_ar5 = AutoReg(ind_prod, 5, old_names=False).fit()
predictions = pd.DataFrame({
    "AR(5)":
    res_ar5.predict(start=714, end=726),
    "AR(13)":
示例#20
0

### Plot sealevel ###
plt.plot(data["year"],data["GMSLNA"])
plt.show()

x = data["GMSLNA"]

### ACF and PACF ###
graphics.plot_acf(x)
graphics.plot_pacf(x)
plt.show()


### AR  ###
sel = ar_select_order(x, 13, old_names=False, seasonal=True, period=37)
sel.ar_lags
res = sel.model.fit()
ax = res.plot_predict(1000, 1100)
ax = x.plot(fig=ax)

plt.show()



### ARIMA RANDOM WALK ###
mod1 = ARIMA(x, seasonal_order = (0,1,0, 37))
res1 = mod1.fit()

predict = res1.get_forecast(100)
predictions = predict.predicted_mean[-100:]
示例#21
0
def train(
    data: np.ndarray,
    used_model: str = "autoreg",
    p: int = 5,
    d: int = 1,
    q: int = 0,
    cov_type="nonrobust",
    method="cmle",
    trend="nc",
    solver="lbfgs",
    maxlag=13,
    # SARIMAX args
    seasonal=(0, 0, 0, 0),
) -> Any:
    """Autoregressive model from statsmodels library. Only univariate data.

    Args:
        data (np.ndarray): Time series data.
        used_model (str, optional): Used model. Defaults to "autoreg".
        p (int, optional): Order of ARIMA model (1st - proportional). Check statsmodels docs for more. Defaults to 5.
        d (int, optional): Order of ARIMA model. Defaults to 1.
        q (int, optional): Order of ARIMA model. Defaults to 0.
        cov_type: Parameters of model call or fit function of particular model. Check statsmodels docs for more.
            Defaults to 'nonrobust'.
        method: Parameters of model call or fit function of particular model. Check statsmodels docs for more.
            Defaults to 'cmle'.
        trend: Parameters of model call or fit function of particular model. Check statsmodels docs for more.
            Defaults to 'nc'.
        solver: Parameters of model call or fit function of particular model. Check statsmodels docs for more.
            Defaults to 'lbfgs'.
        maxlag: Parameters of model call or fit function of particular model. Check statsmodels docs for more.
            Defaults to 13.
        seasonal: Parameters of model call or fit function of particular model. Check statsmodels docs for more.
            Defaults to (0, 0, 0, 0).

    Returns:
        statsmodels.model: Trained model.
    """

    import statsmodels.tsa.api as sm
    from statsmodels.tsa.statespace.sarimax import SARIMAX
    from statsmodels.tsa.arima.model import ARIMA
    from statsmodels.tsa import ar_model

    used_model = used_model.lower()

    if used_model == "ar":
        model = sm.AR(data)
        fitted_model = model.fit(method=method, trend=trend, solver=solver, disp=0)

    elif used_model == "arima":
        order = (p, d, q)
        model = ARIMA(data, order=order)
        fitted_model = model.fit()

    elif used_model == "sarimax":
        order = (p, d, q)
        model = SARIMAX(data, order=order, seasonal_order=seasonal)
        fitted_model = model.fit(method=method, trend=trend, solver=solver, disp=0)

    elif used_model == "autoreg":
        auto = ar_model.ar_select_order(data, maxlag=maxlag)
        model = ar_model.AutoReg(
            data,
            lags=auto.ar_lags,
            trend=auto.trend,
            seasonal=auto.seasonal,
            period=auto.period,
        )
        fitted_model = model.fit(cov_type=cov_type)

    else:
        raise ValueError(
            f"Used model has to be one of ['ar', 'arima', 'sarimax', 'autoreg']. You configured: {used_model}"
        )

    setattr(fitted_model, "my_name", used_model)
    setattr(fitted_model, "data_length", len(data))

    return fitted_model
示例#22
0
import numpy as np
import statsmodels.api as sm
import statsmodels.tsa.stattools as stattools
from statsmodels.tsa.ar_model import AutoReg, ar_select_order
from arch.univariate import ARX, GARCH
from arch import arch_model

data = pd.read_excel('hw6/NYSEReturns.38135430_第三章.xlsx')
rates = data['RATE'].dropna().to_numpy()

# Statistic description
plt.plot(rates)
sm.graphics.tsa.plot_acf(rates)

# ARCH effect
ar_res = ar_select_order(rates, 5).model.fit()
# Test of no serial correlation and homoskedasticity
print(ar_res.diagnostic_summary())
print(ar_res.summary())
plt.figure()
plt.plot(ar_res.resid)

# a = ar_res.resid
# a_res = ar_select_order(a, 5).model.fit()
# print(a_res.diagnostic_summary())

# Fit with GARCH(p, q)
ar = ARX(rates, lags=[1, 2])  # Mean model
ar.volatility = GARCH(p=1, q=1)  # Volatility model
res = ar.fit()
res.plot()
示例#23
0
def test_ar_select_order_smoke():
    data = sm.datasets.sunspots.load(as_pandas=True).data['SUNACTIVITY']
    ar_select_order(data, 4, glob=True, trend='n', old_names=False)
    ar_select_order(data, 4, glob=False, trend='n', old_names=False)
    ar_select_order(data, 4, seasonal=True, period=12, old_names=False)
    ar_select_order(data, 4, seasonal=False, old_names=False)
    ar_select_order(data, 4, glob=True, old_names=False)
    ar_select_order(data,
                    4,
                    glob=True,
                    seasonal=True,
                    period=12,
                    old_names=False)
示例#24
0
fig, axes = plt.subplots(1, 2, clear=True, figsize=(10, 5))
#df.plot(ax=axes[0], title="$\Delta$ log(GDPC1)")
plot_acf(df.values.squeeze(), lags=20, ax=axes[0])
plot_pacf(df, lags=20, ax=axes[1])
plt.tight_layout(pad=2)
plt.savefig(os.path.join(imgdir, 'acf.jpg'))
plt.show()

# Select AR lag order with BIC
from statsmodels.tsa.ar_model import AutoReg, ar_select_order
s = 'GDPC1'  # real gdp, seasonally adjusted
df = alf(s, log=1, diff=1, start=19591201, freq='Q').loc[:20191231].dropna()
df.index = pd.DatetimeIndex(df.index.astype(str), freq='infer')
df_train = df[df.index <= '2017-12-31']
df_test = df[df.index > '2017-12-31']
lags = ar_select_order(df_train, maxlag=13, ic='bic', old_names=False).ar_lags
print('(BIC) lags= ', len(lags), ':', lags)

# AR and SARIMAX
## AR(p) is simplest time-model, can nest in SARIMAX(p,d,q,s) with
## moving average MA(q), integration order I(d), seasonality S(s), exogenous X
from statsmodels.tsa.statespace.sarimax import SARIMAX
adf = alf(s, log=1, freq='Q').loc[19591201:20171231]
adf.index = pd.DatetimeIndex(adf.index.astype(str), freq='infer')
arima = SARIMAX(adf, order=(2, 1, 0), trend='c').fit()
fig = arima.plot_diagnostics(figsize=(10, 6))
plt.tight_layout(pad=2)
plt.savefig(os.path.join(imgdir, 'ar.jpg'))
plt.show()
arima.summary()
示例#25
0
import pandas as pd
import matplotlib.pyplot as plt
from math import sqrt, pi, acos
from statsmodels.tsa.ar_model import ar_select_order
from statsmodels.tsa.arima.model import ARIMA
from numpy.polynomial.polynomial import polyroots

# Load data
gnp = pd.read_csv('../data/dgnp82.txt', delimiter='\s+', header=None, names=['gnp'])
# create a time-series object
gnp = pd.DataFrame({"gnp": gnp['gnp'].to_list()}, index=pd.date_range(start='1947-05', freq='Q', periods=len(gnp)))
# plot
gnp.plot()
plt.show()
# Find the AR order
m1 = ar_select_order(gnp, maxlag=13, ic='aic')
print(f"AR order: {m1.ar_lags[-1]}")
m2 = ARIMA(gnp, order=(m1.ar_lags[-1], 0, 0))
res = m2.fit()
# Estimation
print(res.summary())

# ‘‘const’’ denotes the mean of the series.
# Therefore, the constant term is obtained below:
tmp = 1
for i in range(1, len(res.params) - 1):
    tmp -= res.params[i]
const = res.params[0] * tmp
print(f"const: {const}")
# Residual standard error
print(f"Residual standard error: {sqrt(res.params[-1])}")
示例#26
0
fig, ax = plt.subplots()
ax = housing.plot(ax=ax)
'''
We can start with an AR(3). While this is not a good model for this data, it demonstrates the basic use of the API.
'''
mod = AutoReg(housing, 3, old_names=False)
res = mod.fit()
#print(res.summary())
'''
AutoReg supports the same covariance estimators as OLS. Below, we use cov_type="HC0", which is White’s covariance estimator. 
While the parameter estimates are the same, all of the quantities that depend on the standard error change.
'''
res = mod.fit(cov_type="HC0")
#print(res.summary())

sel = ar_select_order(housing, 13, old_names=False)
sel.ar_lags
res = sel.model.fit()
#print(res.summary())

#fig = res.plot_predict(720, 840)
# fig = plt.figure(figsize=(16,9))
# fig = res.plot_diagnostics(fig=fig, lags=30)

sel = ar_select_order(housing, 13, seasonal=True, old_names=False)
sel.ar_lags
res = sel.model.fit()
print(res.summary())

yoy_housing = data.HOUSTNSA.pct_change(12).resample("MS").last().dropna()
_, ax = plt.subplots()