Exemplo n.º 1
0
  curr = df[mask]
  pct_occupied = curr.PercentOccupied
  if len(pct_occupied) > 0 and not path.exists(filename):

    num_split = int(.7*len(pct_occupied))
    oob_length = int(0.2*len(pct_occupied))

    time_chunks_per_day = curr.groupby(curr.OccupancyDateTime.dt.dayofyear).count().SourceElementKey.max()


    tr, tt = pct_occupied.iloc[:num_split], pct_occupied.iloc[num_split:]

    mdl = auto_arima(tr, error_action='ignore', trace=True,
                         start_p=2, start_q=2, start_P=2, start_Q=2,
                         max_p=10, max_q=10, max_P=10, max_Q=10,
                         d=0, D=0, out_of_sample_size=oob_length, 
                         max_order=None, information_criterion='oob',
                          seasonal=True, m=time_chunks_per_day)

    with open(filename, 'wb') as pkl:
        pickle.dump(mdl, pkl)
  else:
    print('no vals or already ran')

  # preds, conf_int = mdl.predict(n_periods=tt.shape[0], return_conf_int=True)


  # print("Test RMSE: %.3f" % np.sqrt(mean_squared_error(tt, preds)))

# # #############################################################################
# # Plot the points and the forecasts
Exemplo n.º 2
0
def test_inf_max_order():
    _ = pm.auto_arima(
        lynx,
        max_order=None,  # noqa: F841
        suppress_warnings=True,
        error_action='trace')
Exemplo n.º 3
0
def test_valid_max_order_edges(endog, max_order, kwargs):
    fit = pm.auto_arima(endog, max_order=max_order, **kwargs)
    order = fit.order
    ssnal = fit.seasonal_order
    assert (sum(order) + sum(ssnal[:3])) <= max_order
Exemplo n.º 4
0
    def ARIMA(self, model_var='adj_close', n_periods=50, timescale='D'):
        #timeseries = self.tstock_info[model_var]
        timeseries = None

        if model_var == 'close':
            timeseries = self.minute_prices[model_var]
            timeseries.index = np.arange(0, len(timeseries))
        else:
            timeseries = GetTimeSlot(self.tstock_info, days=5 * 365)[model_var]
        fig = plt.figure(figsize=(10, 10))
        ax1 = fig.add_subplot(311)
        fig = plot_acf(timeseries,
                       ax=ax1,
                       title="Autocorrelation on Original Series")
        ax2 = fig.add_subplot(312)
        fig = plot_acf(timeseries.diff().dropna(),
                       ax=ax2,
                       title="1st Order Differencing")
        ax3 = fig.add_subplot(313)
        fig = plot_acf(timeseries.diff().diff().dropna(),
                       ax=ax3,
                       title="2nd Order Differencing")

        #model = ARIMA(timeseries, order=(1, 1, 1))
        #results = model.fit()
        #results.plot_predict(1, 210)
        autoarima_model = pmd.auto_arima(timeseries,
                                         start_p=1,
                                         start_q=1,
                                         test="adf",
                                         trace=True)
        #timeseries['ARIMA'] =
        fitted, confint = autoarima_model.predict(n_periods,
                                                  return_conf_int=True,
                                                  start=timeseries.index[-1])
        fittedv = autoarima_model.predict_in_sample()
        index_of_fc = pd.date_range(timeseries.index[-1],
                                    periods=n_periods,
                                    freq=timescale)
        if model_var == 'close':
            index_of_fc = np.arange(timeseries.index[-1],
                                    +timeseries.index[-1] + n_periods)
        # make series for plotting purpose
        plt.show()
        fittedv_series = pd.Series(fittedv, index=timeseries.index)
        fitted_series = pd.Series(fitted, index=index_of_fc)
        print(fittedv_series - timeseries)
        lower_series = pd.Series(confint[:, 0], index=index_of_fc)
        upper_series = pd.Series(confint[:, 1], index=index_of_fc)
        print(lower_series)
        print(fitted_series)
        print(upper_series)
        # Plot
        plt.plot(timeseries)
        plt.plot(fitted_series, color='darkgreen')
        plt.plot(fittedv_series, color='yellow')
        plt.fill_between(lower_series.index,
                         lower_series,
                         upper_series,
                         color='k',
                         alpha=.15)

        plt.title(
            "SARIMA - Final Forecast of Stock prices - Time Series Dataset")
        plt.show()
Exemplo n.º 5
0
    plt.title('Wykres cen akcji firmy ' + sStock + '\nPrzedział czasu równy ' +
              sPeriod)

    plt.show()


FigPlot(vStockDate, vStockClose)

modArima = pmd.auto_arima(
    vStockClose,
    start_p=0,
    start_q=0,
    test='adf',  # use adftest to find optimal 'd'
    max_p=5,
    max_q=5,  # maximum p and q
    m=1,  # frequency of series
    d=None,  # let model determine 'd'
    seasonal=False,  # No Seasonality
    start_P=0,
    D=0,
    trace=True,
    error_action='ignore',
    suppress_warnings=True,
    stepwise=True)

#Summary of the Arima model
modSummary = modArima.summary()
print(modSummary)

#Few graphs to check the correctness of model
modArima.plot_diagnostics(figsize=(8, 8))
plt.show()
Exemplo n.º 6
0
def prediction_generator(investor,
                         prediction_file='data/prediction.csv',
                         horizon=5):
    """
    Main function that will generate predictions getting data from IEXFINANCE
    """
    start = datetime.now() - timedelta(days=duration)
    end = datetime.now()
    # get data from IEXFINANCE, import data to a panda's dataframe
    df = get_historical_data(ticker_list,
                             start,
                             end,
                             output_format='pandas',
                             token="sk_261e4411a4ef43fab3fea00a67631841")
    idx = pd.IndexSlice
    df = df.loc[:, idx[:, "close"]]
    df.columns = ticker_list
    # importing predictions
    predictions = pd.DataFrame()
    for i in ticker_list:
        model = pm.auto_arima(df[i].values[~np.isnan(df[i].values)])
        preds = model.predict(n_periods=5)
        predictions[i] = preds
    # measuring growth to differentiate between risky and non-risky predictions
    new_df = df.iloc[df.shape[0] - 1:, ].reset_index(drop=True).append(
        predictions.iloc[:1, :].reset_index(drop=True)).reset_index(drop=True)
    growth = pd.DataFrame()
    for i in range(new_df.shape[1]):
        growth[ticker_list[i]] = [((new_df[ticker_list[i]].values[1] -
                                    new_df[ticker_list[i]].values[0]) /
                                   new_df[ticker_list[i]].values[1])]
    growth = growth.iloc[0].sort_values(ascending=False)
    # differentiate ticker lists for risky (a) and non risky investors (b)
    ticker_list_a = growth.index[0:3]
    ticker_list_b = growth.index[3:]
    # OUTPUT
    if investor == "risky":
        predictions = predictions.loc[:, ticker_list_a]
        # saving a txt with text to be printed to the client
        with open('data/growth_info.txt', 'w', encoding="utf-8") as f:
            print(*[
                "Stock growth:\n" + " " + ticker_list_a[i] + ": " +
                str(growth[ticker_list_a[i]] * 100) +
                "%\n " if i == 0 else ticker_list_a[i] + ":" +
                str(growth[ticker_list_a[i]] * 100) + "%\n"
                for i in range(len(ticker_list_a))
            ],
                  file=f)
        # importing the txt file again to an object to be printed to the client
        with open("data/growth_info.txt", "r", encoding="utf-8") as f_open:
            growth_info = f_open.read()
        # generating text to print predictions to the client
        values_list = []
        for k in range(len(ticker_list_a)):
            values = ""
            # setting prediction horizon (5 days)
            days = ["Day " + str(i + 1) + ": €" for i in range(horizon)]
            for i in range(predictions.shape[0]):
                if i != predictions.shape[0] - 1:
                    values += days[i] + str(np.round(predictions.iloc[i, k],
                                                     2)) + ", "
                else:
                    values += days[i] + str(np.round(predictions.iloc[i, k],
                                                     2)) + "."
            values_list.append(values)
        # saving a txt with text to be printed to the client
        with open('data/predition_info.txt', 'w', encoding="utf-8") as f:
            print(
                *[
                    ticker_list_a[i] + "\n" + values_list[i] +
                    "\n" if i == 0 else "\n" + ticker_list_a[i] + "\n" +
                    values_list[i] + "\n" for i in range(len(values_list))
                ],
                file=f,
            )
        # importing the txt file again to an object to be printed to the client
        with open("data/predition_info.txt", "r", encoding="utf-8") as f_open:
            predicion_info = f_open.read()

    elif investor == "non-risky":
        # saving a txt with text to be printed to the client
        with open('data/growth_info.txt', 'w', encoding="utf-8") as f:
            print(*[
                "Stock growth:\n" + " " + ticker_list_b[i] + ": " +
                str(growth[ticker_list_b[i]] * 100) +
                "%\n" if i == 0 else ticker_list_b[i] + ":" +
                str(growth[ticker_list_b[i]] * 100) + "%\n"
                for i in range(len(ticker_list_b))
            ],
                  file=f)
        # importing the txt file again to an object to be printed to the client
        with open("data/growth_info.txt", "r", encoding="utf-8") as f_open:
            growth_info = f_open.read()
        # generating text to print predictions to the client
        values_list = []
        predictions = predictions.loc[:, ticker_list_b]
        for k in range(len(ticker_list_b)):
            values = ""
            # setting prediction horizon (5 days)
            days = ["Day " + str(i + 1) + ": €" for i in range(horizon)]
            for i in range(predictions.shape[0]):
                if i != predictions.shape[0] - 1:
                    values += days[i] + str(np.round(predictions.iloc[i, k],
                                                     2)) + ", "
                else:
                    values += days[i] + str(np.round(predictions.iloc[i, k],
                                                     2)) + "."
            values_list.append(values)
        # saving a txt with text to be printed to the client
        with open('data/predition_info.txt', 'w', encoding="utf-8") as f:
            print(
                *[
                    ticker_list_b[i] + "\n" + values_list[i] +
                    "\n" if i == 0 else "\n" + ticker_list_b[i] + "\n" +
                    values_list[i] + "\n" for i in range(len(values_list))
                ],
                file=f,
            )
        # importing the txt file again to an object to be printed to the client
        with open("data/predition_info.txt", "r", encoding="utf-8") as f_open:
            predicion_info = f_open.read()

    return growth_info + predicion_info
Exemplo n.º 7
0
        dfCrime_sup2010['mois'] >= '2019-01-01']
    timeseries_19_20 = dfCrime_2019_2020.set_index('mois')
    timeseries_19_20.index = pd.to_datetime(timeseries_19_20.index,
                                            format='%Y_%m')

    crime_decomposed = seasonal_decompose(timeseries_10_19['total'],
                                          model='multiplicative')

    # Fit auto_arima function to dataset
    stepwise_fit = auto_arima(timeseries_10_19['total'],
                              start_p=1,
                              start_q=1,
                              max_p=3,
                              max_q=3,
                              m=12,
                              start_P=0,
                              seasonal=True,
                              d=None,
                              D=1,
                              trace=True,
                              error_action='ignore',
                              suppress_warnings=True,
                              stepwise=True)

    ### FIT TO DATASET

    # Split data into train / test sets
    train = timeseries_10_19.iloc[:len(timeseries_10_19) - 12]
    test = timeseries_10_19.iloc[len(timeseries_10_19) -
                                 12:]  # set one year(12 months) for testing

    # Fit a SARIMAX on the training set
Exemplo n.º 8
0
def arima(l_args, s_ticker, s_interval, df_stock):
    parser = argparse.ArgumentParser(
        prog='arima',
        description="""In statistics and econometrics, and in particular in time
                                     series analysis, an autoregressive integrated moving average (ARIMA) model
                                     is a generalization of an autoregressive moving average (ARMA) model. Both
                                     of these models are fitted to time series data either to better understand
                                     the data or to predict future points in the series (forecasting).
                                     ARIMA(p,d,q) where parameters p, d, and q are non-negative integers, p is
                                     the order (number of time lags) of the autoregressive model, d is the degree
                                     of differencing (the number of times the data have had past values subtracted),
                                     and q is the order of the moving-average model."""
    )

    parser.add_argument('-d',
                        "--days",
                        action="store",
                        dest="n_days",
                        type=check_positive,
                        default=5,
                        help='prediction days.')
    parser.add_argument('-i',
                        "--ic",
                        action="store",
                        dest="s_ic",
                        type=str,
                        default='aic',
                        choices=['aic', 'aicc', 'bic', 'hqic', 'oob'],
                        help='information criteria.')
    parser.add_argument('-s',
                        "--seasonal",
                        action="store_true",
                        default=False,
                        dest="b_seasonal",
                        help='Use weekly seasonal data.')
    parser.add_argument('-o',
                        "--order",
                        action="store",
                        dest="s_order",
                        type=str,
                        help='arima model order (p,d,q) in format: pdq.')
    parser.add_argument('-r',
                        "--results",
                        action="store_true",
                        dest="b_results",
                        default=False,
                        help='results about ARIMA summary flag.')

    (ns_parser, l_unknown_args) = parser.parse_known_args(l_args)

    if l_unknown_args:
        print(
            f"The following args couldn't be interpreted: {l_unknown_args}\n")
        return

    # Machine Learning model
    if ns_parser.s_order:
        t_order = tuple([int(ord) for ord in list(ns_parser.s_order)])
        model = ARIMA(df_stock['5. adjusted close'].values,
                      order=t_order).fit()
        l_predictions = model.predict(
            start=len(df_stock['5. adjusted close']) + 1,
            end=len(df_stock['5. adjusted close']) + ns_parser.n_days)
    else:
        if ns_parser.b_seasonal:
            model = pmdarima.auto_arima(df_stock['5. adjusted close'].values,
                                        error_action='ignore',
                                        seasonal=True,
                                        m=5,
                                        information_criteria=ns_parser.s_ic)
        else:
            model = pmdarima.auto_arima(df_stock['5. adjusted close'].values,
                                        error_action='ignore',
                                        seasonal=False,
                                        information_criteria=ns_parser.s_ic)
        l_predictions = model.predict(n_periods=ns_parser.n_days)

    # Prediction data
    l_pred_days = get_next_stock_market_days(
        last_stock_day=df_stock['5. adjusted close'].index[-1],
        n_next_days=ns_parser.n_days)
    df_pred = pd.Series(l_predictions, index=l_pred_days, name='Price')

    if ns_parser.b_results:
        print(model.summary())
        print("")

    # Plotting
    plt.plot(df_stock.index, df_stock['5. adjusted close'], lw=2)
    if ns_parser.s_order:
        plt.title(
            f"ARIMA {str(t_order)} on {s_ticker} - {ns_parser.n_days} days prediction"
        )
    else:
        plt.title(
            f"ARIMA {model.order} on {s_ticker} - {ns_parser.n_days} days prediction"
        )
    plt.xlim(df_stock.index[0],
             get_next_stock_market_days(df_pred.index[-1], 1)[-1])
    plt.xlabel('Time')
    plt.ylabel('Share Price ($)')
    plt.grid(b=True, which='major', color='#666666', linestyle='-')
    plt.minorticks_on()
    plt.grid(b=True, which='minor', color='#999999', linestyle='-', alpha=0.2)
    plt.plot([df_stock.index[-1], df_pred.index[0]],
             [df_stock['5. adjusted close'].values[-1], df_pred.values[0]],
             lw=1,
             c='tab:green',
             linestyle='--')
    plt.plot(df_pred.index, df_pred, lw=2, c='tab:green')
    plt.axvspan(df_stock.index[-1],
                df_pred.index[-1],
                facecolor='tab:orange',
                alpha=0.2)
    xmin, xmax, ymin, ymax = plt.axis()
    plt.vlines(df_stock.index[-1],
               ymin,
               ymax,
               linewidth=1,
               linestyle='--',
               color='k')
    plt.show()

    # Print prediction data
    print("Predicted share price:")
    df_pred = df_pred.apply(lambda x: f"{x:.2f} $")
    print(df_pred.to_string())
    print("")
Exemplo n.º 9
0
import matplotlib.pyplot as plt  
import statsmodels.api as sm  
import seaborn as sb
from scipy import stats
sb.set_style('darkgrid')
from pmdarima import auto_arima 
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_absolute_error,mean_squared_error
import math

#import the csv and store in a dataframe
stock_data = pd.read_csv('C:/Users/Smit/Dataset/yahoo/stockMarket.csv')
newdata = stock_data.set_index('Date')
newdata = newdata.iloc[:,3]
newdata = pd.DataFrame(newdata)
summary = auto_arima(newdata['Close'],start_p=0,
                     start_q=0,max_p=3,max_q=3,seasonal=False,trace=True)
summary.summary()

trian = newdata.iloc[:1200]
test = newdata.iloc[1200:]
start = len(trian)
end = len(trian) + len(test) - 1
model_arima = ARIMA(trian['Close'],order=(0,1,0))
result_arima = model_arima.fit()
prediction= result_arima.predict(start=start,end=end,typ='levels')
prediction= pd.DataFrame(prediction)

test['prediction'] = prediction.values
test.plot()

mae = mean_absolute_error(prediction,test['Close'])
Exemplo n.º 10
0
sm.graphics.tsa.plot_pacf(diff_stock_data_train.values.squeeze(),
                          lags=40,
                          ax=ax[1])  # modify not to generatoe graph write

# Parameter search (ESstimate Parameters)
# Auto Diagnosis Check - ARIMA
# ARIMA model fitting
# The (p,d,q) order of the model for the number of AR parameters,
# diffrences, and MA parameters to use
# AIC ?
auto_arima_model = auto_arima(stock_data_train,
                              start_p=1,
                              start_q=1,
                              max_p=3,
                              max_q=3,
                              seasonal=False,
                              d=1,
                              trace=True,
                              error_action='ignore',
                              suppress_warnings=True,
                              stepwise=False)

summary = auto_arima_model.summary()

prediction = auto_arima_model.predict(len(stock_data_test),
                                      return_conf_int=True)

predicted_value = prediction[0]
predicted_ub = prediction[1][:, 0]
predicted_lb = prediction[1][:, 1]
predict_index = list(stock_data_test.index)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 24 19:31:50 2020

@author: Max
"""

import pmdarima as pm
import yfinance as yf
import pandas as pd

df = yf.download('GOOG',
                 start='2015-01-01',
                 end='2018-12-31',
                 adjusted=True,
                 progress=False)

goog = df.resample('W').last() \
    .rename(columns={'Adj Close':'adj_close'}).adj_close

model = pm.auto_arima(goog,
                      error_action='ignore',
                      suppress_warnings=True,
                      seasonal=False,
                      stepwise=False,
                      approximation=False,
                      n_jobs=-1)

print(model.summary())
Exemplo n.º 12
0
def train_models(train,
                 models,
                 forecast_len,
                 full_df=None,
                 seasonality="infer_from_data",
                 in_sample=None,
                 freq=None,
                 GPU=None):

    seasons = select_seasonality(train, seasonality)

    periods = select_seasonality(train, 'periodocity')

    models_dict = {}
    for m in models:
        if in_sample:
            print(
                "Model {} is being trained for in sample prediction".format(m))
        else:
            print("Model {} is being trained for out of sample prediction".
                  format(m))
        if m == "ARIMA":
            models_dict[m] = pm.auto_arima(train, seasonal=True, m=seasons)
        if m == "Prophet":
            if freq == "D":
                model = Prophet(daily_seasonality=True)
            else:
                model = Prophet()
            models_dict[m] = model.fit(prophet_dataframe(train))
        if m == "HWAAS":
            try:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend='add',
                    seasonal='add',
                    damped=True).fit(use_boxcox=True)
            except:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend='add',
                    seasonal='add',
                    damped=True).fit(use_boxcox=False)
        if m == "HWAMS":
            try:
                models_dict[m] = ExponentialSmoothing(
                    train,
                    seasonal_periods=seasons,
                    trend='add',
                    seasonal='mul',
                    damped=True).fit(use_boxcox=True)
            except:
                try:
                    models_dict[m] = ExponentialSmoothing(
                        train,
                        seasonal_periods=seasons,
                        trend='add',
                        seasonal='mul',
                        damped=True).fit(use_boxcox=False)
                except:
                    models_dict[m] = ExponentialSmoothing(
                        train,
                        seasonal_periods=seasons,
                        trend=None,
                        seasonal='add').fit(use_boxcox=False)

        # if m=="HOLT":
        #   models_dict["HOLT"] = Holt(train,exponential=True).fit()
        if m == "PYAF":
            model = autof()
            model.train(iInputDS=train.reset_index(),
                        iTime='Date',
                        iSignal='Target',
                        iHorizon=len(train))  # bad coding to have horison here
            models_dict[m] = model.forecast(iInputDS=train.reset_index(),
                                            iHorizon=forecast_len)
        if m == "Gluonts":
            freqed = pd.infer_freq(train.index)
            if freqed == "MS":
                freq = "M"
            else:
                freq = freqed
            estimator = DeepAREstimator(
                freq=freq,
                prediction_length=forecast_len,
                trainer=Trainer(epochs=6,
                                ctx='gpu'))  #use_feat_dynamic_real=True
            if GPU:
                models_dict[m] = estimator.train(
                    training_data=gluonts_dataframe(train))
            else:
                models_dict[m] = estimator.train(
                    training_data=gluonts_dataframe(train))
        if m == "NBEATS":

            if GPU:
                device = torch.device('cuda')
            else:
                device = torch.device('cpu')

            if os.path.isfile(CHECKPOINT_NAME):
                os.remove(CHECKPOINT_NAME)
            stepped = 35
            batch_size = 10
            if in_sample:
                x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=True, device=device)
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                #test_losses = []
                for r in range(stepped):

                    train_100_grad_steps(data, device, net,
                                         optimiser)  #test_losses
                models_dict[m] = {}
                models_dict[m]["model"] = net
                models_dict[m]["x_test"] = x_test
                models_dict[m]["y_test"] = y_test
                models_dict[m]["constant"] = norm_constant

            else:  # if out_sample train is df

                x_train, y_train, net, norm_constant = nbeats_dataframe(
                    full_df, forecast_len, in_sample=False, device=device)

                batch_size = 10  # greater than 4 for viz
                optimiser = optim.Adam(net.parameters())
                data = data_generator(x_train, y_train, batch_size)
                stepped = 5
                #test_losses = []
                for r in range(stepped):
                    # _, forecast = net(torch.tensor(x_train, dtype=torch.float)) ### Not Used
                    # if GPU:
                    #   p = forecast.detach().numpy()                               ### Not Used
                    # else:
                    #   p = forecast.detach().numpy()                               ### Not Used
                    train_100_grad_steps(data, device, net,
                                         optimiser)  #test_losses
                models_dict[m] = {}
                models_dict[m]["model"] = net
                models_dict[m]["tuple"] = (x_train, y_train, net,
                                           norm_constant)

        # if m=="TBA":
        #   bat = TBATS(use_arma_errors=False,use_box_cox=True)
        #   models_dict[m] = bat.fit(train)
        if m == "TATS":
            bat = TBATS(seasonal_periods=list(
                get_unique_N(season_list(train), 1)),
                        use_arma_errors=False,
                        use_trend=True)
            models_dict[m] = bat.fit(train)
        if m == "TBAT":
            bat = TBATS(use_arma_errors=False,
                        use_box_cox=True,
                        use_trend=True)
            models_dict[m] = bat.fit(train)
        if m == "TBATS1":
            bat = TBATS(seasonal_periods=[seasons],
                        use_arma_errors=False,
                        use_box_cox=True,
                        use_trend=True)
            models_dict[m] = bat.fit(train)
        if m == "TBATP1":
            bat = TBATS(seasonal_periods=[periods],
                        use_arma_errors=False,
                        use_box_cox=True,
                        use_trend=True)
            models_dict[m] = bat.fit(train)
        if m == "TBATS2":
            bat = TBATS(seasonal_periods=list(
                get_unique_N(season_list(train), 2)),
                        use_arma_errors=False,
                        use_box_cox=True,
                        use_trend=True)
            models_dict[m] = bat.fit(train)

        # if m=="ProphetGluonts":
        #   freqed = pd.infer_freq(train.index)
        #   if freqed=="MS":
        #     freq= "M"
        #   else:
        #     freq= freqed
        #   models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True
        #   models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"])

    return models_dict, seasons
Exemplo n.º 13
0
for key, value in births1_adf[4].items():
    print('\t%s: %.3f' % (key, value))
'''
ADF Statistic: -0.331281
p-value: 0.920956
Critical Values:
	1%: -3.474
	5%: -2.880
	10%: -2.577'''
#p-value: 0.920956 ie > 0.05, Null Hypothesis accepted and the data is not stationary
#H0: Data is not stationary

#Applying auto - arima to forecast
from pmdarima import auto_arima

births_mod = auto_arima(births)
births_mod.summary()
'''
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                      y   No. Observations:                  168
Model:               SARIMAX(2, 1, 1)   Log Likelihood                -271.935
Date:                Fri, 26 Mar 2021   AIC                            551.870
Time:                        00:35:55   BIC                            564.342
Sample:                             0   HQIC                           556.932
                                - 168                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.2509      0.095      2.643      0.008       0.065       0.437
Exemplo n.º 14
0
def auto_arima_model(test_data):

    return pmdarima.auto_arima(test_data["orders"],
                               max_d=1,
                               suppress_warnings=True,
                               error_action="raise")
Exemplo n.º 15
0
from pmdarima import auto_arima

# Ignore harmless warnings
import warnings

warnings.filterwarnings("ignore")

# Fit auto_arima function to AirPassengers dataset
stepwise_fit = auto_arima(
    airline['#Passengers'],
    start_p=1,
    start_q=1,
    max_p=3,
    max_q=3,
    m=12,
    start_P=0,
    seasonal=False,
    d=None,
    D=1,
    trace=True,
    error_action='ignore',  # we don't want to know if an order does not work 
    suppress_warnings=True,  # we don't want convergence warnings 
    stepwise=True)  # set to stepwise

# To print the summary
stepwise_fit.summary()
# Split data into train / test sets
train = airline.iloc[:len(airline) - 12]
test = airline.iloc[len(airline) - 12:]  # set one year(12 months) for testing

# Fit a SARIMAX(0, 1, 1)x(2, 1, 1, 12) on the training set
Exemplo n.º 16
0
    "Turnover_std_lag3", "Trades_mean_lag3", "Trades_std_lag3",
    "High_mean_lag7", "High_std_lag7", "Low_mean_lag7", "Low_std_lag7",
    "Volume_mean_lag7", "Volume_std_lag7", "Turnover_mean_lag7",
    "Turnover_std_lag7", "Trades_mean_lag7", "Trades_std_lag7",
    "High_mean_lag30", "High_std_lag30", "Low_mean_lag30", "Low_std_lag30",
    "Volume_mean_lag30", "Volume_std_lag30", "Turnover_mean_lag30",
    "Turnover_std_lag30", "Trades_mean_lag30", "Trades_std_lag30", "month",
    "week", "day", "day_of_week"
]

#LOADING AND FITTING ARIMA -AUTO REGRESSIVE INTEGRATED MOVING AVERAGE- TO FORECAST PRICES
#arima uses its own preceding values lagging figures and lagging forecast errors to predict future values

model = auto_arima(df_train.VWAP,
                   exogenous=df_train[exogenous_features],
                   trace=True,
                   error_action="ignore",
                   suppress_warnings=True)
model.fit(df_train.VWAP, exogenous=df_train[exogenous_features])

forecast = model.predict(n_periods=len(df_valid),
                         exogenous=df_valid[exogenous_features])
df_valid["Forecast_ARIMAX"] = forecast
##RESULT: PICK THE MODEL WITH THE LOWEST AIC
#%%
#plot the results
df_valid[["VWAP", "Forecast_ARIMAX"]].plot(figsize=(14, 7))
#%%
print("RMSE of Auto ARIMAX:",
      np.sqrt(mean_squared_error(df_valid.VWAP, df_valid.Forecast_ARIMAX)))
print("\nMAE of Auto ARIMAX:",
Exemplo n.º 17
0
        tp, tn, fp, fn = 0, 0, 0, 0
        accurracy_matrix_df_for_chunk_iteration = pd.DataFrame(
            columns=["TP", "FN", "FP", "TN"])

        ########################
        # ARIMA
        ########################

        current_train_list = dict_of_chunk_series_with_test_and_train_and_forecast[
            chunk][chunk_iteration]["TRAIN_LIST_MEDIAN"]
        current_test_list = dict_of_chunk_series_with_test_and_train_and_forecast[
            chunk][chunk_iteration]["TEST_LIST_MEDIAN"]

        try:
            arima = pm.auto_arima(current_train_list,
                                  seasonal=False,
                                  suppress_warnings=True,
                                  error_action='ignore')
            forecast_arima = pd.Series(arima.predict(TEST),
                                       index=[*range(TRAIN, TRAIN + TEST, 1)],
                                       name="forecast_list_arima")
            dict_of_chunk_series_with_test_and_train_and_forecast[chunk][
                chunk_iteration]["FORECAST_LIST_ARIMA"] = forecast_arima

            runningtime = round(((time.time() - starttime) / 60), 5)
            print('Chunk ' + str(j) + ' (ID: ' + str(chunk) + ') iteration ' +
                  str(chunk_iteration) + ': Completed ARIMA. Running time ' +
                  str(runningtime) + ' min.')

            # extract threshold series
            threshold_high_for_test_list = dict_of_chunk_series_with_test_and_train_and_forecast[
                chunk][chunk_iteration]["THRESHOLD_HIGH_FOR_TEST_LIST"]
Exemplo n.º 18
0
#> 70.63707081783771

y_ = my_lm_model.predict(my_df[['x']])
tmp = pd.DataFrame(y_, index=my_df.index)
plt.plot(my_train.y, label='train')
plt.plot(my_test.y, label='test')
plt.plot(tmp, label='model')
plt.legend()

### 12.2.3 SARIMAによる時系列予測

#### 12.2.3.1 モデルの構築

import pmdarima as pm

my_arima_model = pm.auto_arima(my_train.y, m=12, trace=True)
#> (省略)
#> Best model:  ARIMA(1,1,0)(0,1,0)[12]
#> Total fit time: 0.838 seconds

#### 12.2.3.2 予測

y_, my_ci = my_arima_model.predict(
    len(my_test),  # 期間はテストデータと同じ.
    alpha=0.05,  # 有意水準(デフォルト)
    return_conf_int=True)  # 信頼区間を求める.
tmp = pd.DataFrame({
    'y': y_,
    'Lo': my_ci[:, 0],
    'Hi': my_ci[:, 1]
},
Exemplo n.º 19
0
plt.title('S&P/Case-Shiller U.S. National Home Price Index')
plt.ylabel('Index Jan 2000=100, Seasonally Adjusted')
plt.xlabel('Date')
plt.show()

# ## Information Criterion Method

# In[13]:

model = pm.auto_arima(data,
                      d=1,
                      D=1,
                      seasonal=False,
                      start_p=0,
                      start_q=0,
                      max_order=6,
                      test='adf',
                      max_p=6,
                      error_action='ignore',
                      suppress_warnings=True,
                      stepwise=True,
                      trace=True)

# In[31]:

model_3 = ARIMA(data, order=(2, 1, 1), freq='MS')
model_fit3 = model_3.fit()
output_3 = model_fit3.forecast(steps=36, freq='MS')
forecast_3 = pd.concat([forecast_2, output_3], axis=1)
forecast_3.columns = [
    forecast_1.columns[0], 'Forecast (2,0,0)', 'Forecast (2,1,0)',
Exemplo n.º 20
0
print(__doc__)

# Author: Taylor Smith <*****@*****.**>

import pmdarima as pm
from pmdarima import model_selection
import numpy as np
from matplotlib import pyplot as plt

# #############################################################################
# Load the data and split it into separate pieces
data = pm.datasets.load_wineind()
train, test = model_selection.train_test_split(data, train_size=150)

# Fit a simple auto_arima model
arima = pm.auto_arima(train,
                      error_action='ignore',
                      trace=True,
                      suppress_warnings=True,
                      maxiter=10,
                      seasonal=True,
                      m=12)

# #############################################################################
# Plot actual test vs. forecasts:
x = np.arange(test.shape[0])
plt.scatter(x, test, marker='x')
plt.plot(x, arima.predict(n_periods=test.shape[0]))
plt.title('Actual test samples vs. forecasts')
plt.show()
Exemplo n.º 21
0
# 使用ARIMA
data = df.sort_index(ascending=True, axis=0)

train = data[:987]
valid = data[987:]

training = train['Close']
validation = valid['Close']

model = auto_arima(training,
                   start_p=1,
                   start_q=1,
                   max_p=3,
                   max_q=3,
                   m=12,
                   start_P=0,
                   seasonal=True,
                   d=1,
                   D=1,
                   trace=True,
                   error_action='ignore',
                   suppress_warnings=True)
model.fit(training)

forecast = model.predict(n_periods=248)
forecast = pandas.DataFrame(forecast,
                            index=valid.index,
                            columns=['Prediction'])

rms = numpy.sqrt(
    numpy.mean(
Exemplo n.º 22
0
with similar functionality.

auto_arima() uses a stepwise approach to search multiple combinations of p,d,q parameters 
and chooses the best model that has the least AIC.
"""
# conda install pyramid-arima
import pmdarima as pm

model = pm.auto_arima(
    ss0,
    start_p=1,
    start_q=1,
    max_p=3,
    max_q=3,
    m=1,  # frequency of series
    test='adf',  # use adf_test to fid optimal `d`
    d=None,  # let model determine `d`
    seasonal=False,
    start_P=0,
    D=0,
    stepwise=True,
    trace=True,
    error_action='ignore',
    supress_warnings=True)
print(model.summary())

#%% 13. How to interpret the residual plots in ARIMA model
model.plot_diagnostics()
# looks like API of  statsmodels.tsa.arima.model  is used (new, not .arima_model - old)

#%% do some prediction
fc, confint = model.predict(n_periods=15, return_conf_int=True)
Exemplo n.º 23
0
def autoarima(data, Pre_day):
    stepwise_fit = pm.auto_arima(data, m=12, seasonal=True, error_action='ignore', suppress_warnings=True,
                                 stepwise=True)
Exemplo n.º 24
0
def auto_arima_forecast(series,
                        validation_series,
                        horizon,
                        del_outliers=False,
                        normalize=False,
                        plot=False):
    """
    Fits an auto arima model from the series to find the best parameters. Performance of the trained model is assessed
    on a validation series.

    :param series:
    :param validation_series:
    :param horizon:
    :param del_outliers:
    :param normalize:
    :param plot:
    :return: SMAPE for the validation series, the forecast validation series, order, seasonal_order
    """

    # whether to remove outliers in the training series
    if del_outliers:
        working_series = remove_outliers(series)

    else:
        working_series = series

    # whether to normalize the training series
    if normalize:
        scaler, working_series = normalize_series(working_series)

    else:
        scaler = None

    # input sequence is our data
    train_series = working_series

    # perform search for best parameters and fit
    model = auto_arima(train_series,
                       seasonal=True,
                       max_D=2,
                       m=7,
                       trace=True,
                       error_action='ignore',
                       suppress_warnings=True,
                       stepwise=True)

    order = model.get_params()['order']
    seasonal_order = model.get_params()['seasonal_order']

    # apparently useless model.fit(train_series)

    # perform predictions
    f_autoarima = model.predict(n_periods=horizon)

    # dataframe which contains the result
    forecast_dataframe = pd.DataFrame(index=validation_series.index)

    # if data was normalized, we need to apply the reverse transform
    if normalize:

        # first reverse log1p using expm1
        validation_forecast = f_autoarima

        # use scaler to reverse normalizing
        denormalized_forecast = scaler.inverse_transform(
            validation_forecast.reshape(-1, 1))
        denormalized_forecast = [val[0] for val in denormalized_forecast]

        # save the forecast in the dataframe
        forecast_dataframe['forecast'] = denormalized_forecast

    else:

        # save the forecast in the dataframe
        forecast_dataframe['forecast'] = f_autoarima

    if plot:
        plt.figure(figsize=(10, 6))

        plt.plot(series[-100:], color="blue", linestyle="-")
        plt.plot(validation_series, color="green", linestyle="-")
        plt.plot(forecast_dataframe, color="red", linestyle="--")

        plt.legend(["Train series", "Validation series", "Predicted series"])

        plt.title("Validation of auto arima model")

        plt.show()

    return smape(validation_series, forecast_dataframe['forecast']
                 ), forecast_dataframe['forecast'], order, seasonal_order
Exemplo n.º 25
0
"""
for i in data.columns:
    print(f"Jarque Bera for {i}")
    print(stats.jarque_bera(data[i].dropna()))
    print(f"Augmented Dickey-Fuller for {i}")
    print(adfuller(data[i].dropna()))
"""
#DETERMINAR MODELOS AIC
modelos = dict()
for i in data.columns:

    print(f"Modelo arima para {i}"
          f"##############################################"
          f"##############################################")
    # Fit auto_arima function to AirPassengers dataset
    modelos[i] = auto_arima(data[i].dropna())  # set to stepwise

    # To print the summary
    print(modelos[i].summary())
"""
modelos_AR = dict()
for i in data.columns:

    print(f"Modelo arima para {i}"
          f"##############################################"
          f"##############################################")
    # Fit auto_arima function to AirPassengers dataset
    modelos_AR[i] = ARIMA(data[i].dropna(), order = (1,0,0)).fit()  # set to stepwise

    # To print the summary
    print(modelos_AR[i].summary())
Exemplo n.º 26
0
plot_pacf(train_data)
plt.show()
(p, q) = (sm.tsa.arma_order_select_ic(train_data, max_ar=3, max_ma=3,
                                      ic='aic')['aic_min_order'])
print(p, q)

data = np.array(train_dataset.values).T[0]
fittedmodel = auto_arima(
    train_data,
    start_p=1,
    start_q=1,
    max_p=3,
    max_q=3,
    max_d=3,
    max_order=None,
    seasonal=False,
    m=1,
    test='adf',
    trace=False,
    error_action='ignore',  # don't want to know if an order does not work
    suppress_warnings=True,  # don't want convergence warnings
    stepwise=True,
    information_criterion='bic',
    njob=-1)  # set to stepwise
print(fittedmodel.summary())


def plot_arima(truth,
               forecasts,
               title="ARIMA",
               xaxis_label='Time',
Exemplo n.º 27
0
def test_r_equivalency(dataset, m, kwargs, expected_order, expected_seasonal):
    fit = pm.auto_arima(dataset, m=m, trace=1, suppress_warnings=True)
    assert fit.order == expected_order
    assert fit.seasonal_order[:3] == expected_seasonal
Exemplo n.º 28
0
q = list(train['Value'])
out_seq = array([q[i] for i in range(len(q))])
q = out_seq.tolist()
print(q)
q.append(1)
#val=list(p[['Software Bug']].values())
#print(val)
train, test = q[:48], q[48:49]
print(pmdarima.arima.nsdiffs(train, 4, max_D=9))
print(train)
forplot = []
# =============================================================================
stepwise_model = auto_arima(train,
                            m=12,
                            p=0,
                            d=2,
                            q=0,
                            start_p=0,
                            start_q=0,
                            max_q=12,
                            max_p=12,
                            error_action='ignore',
                            suppress_warnings=True)
model = stepwise_model.fit(train)

for i in range(len(test)):

    q1 = model.predict(n_periods=1)
    train.append(q1)
    print(train)
Exemplo n.º 29
0
def test_value_errors(endog, kwargs):
    with pytest.raises(ValueError):
        pm.auto_arima(endog, **kwargs)
Exemplo n.º 30
0
##########################################################
# Load the data and split it into separate pieces
data = pm.datasets.load_lynx()
data
data.shape
train, test = data[:90], data[90:]
train.shape, test.shape
# Fit a simple auto_arima model
tsmodel = pm.auto_arima(train,
                        start_p=1,
                        start_q=1,
                        start_P=1,
                        start_Q=1,
                        max_p=5,
                        max_q=5,
                        max_P=5,
                        max_Q=5,
                        seasonal=True,
                        stepwise=True,
                        suppress_warnings=True,
                        D=10,
                        max_D=10,
                        error_action='ignore')
tsmodel
# Create predictions for the future, evaluate on test
preds, conf_int = tsmodel.predict(n_periods=test.shape[0],
                                  return_conf_int=True)
preds
conf_int
# Print the error:
test