Exemplo n.º 1
0
def order_selection(train,
                    test,
                    params,
                    loss_func=mean_squared_error,
                    **loss_kwargs):
    warnings.filterwarnings(
        "ignore")  # to ignore statsmodels warning for unconverged models
    best_score, best_cfg = float("inf"), None
    keys, values = zip(*params.items())
    grid = [dict(zip(keys, v)) for v in itertools.product(*values)]
    for params in grid:
        try:
            model_fit = SARIMAX(train, **params).fit()
        except:
            continue
        else:
            yhat = model_fit.forecast(test.shape[0])
            loss = loss_func(test, yhat, **loss_kwargs)

        if loss < best_score:
            best_score, best_params = loss, params
            print(best_score)

    print('Best ARIMA%s Loss=%.3f' % (best_params, best_score))
    return best_params, best_score
def sarima_models_top_18():
    new_sarima_orders = [
        ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12)),
        ((1, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)),
        ((1, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)),
        ((1, 1, 1), (0, 1, 1, 12)), ((0, 1, 1), (1, 1, 1, 12)),
        ((1, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)),
        ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12)),
        ((0, 1, 1), (0, 1, 1, 12)), ((1, 1, 1), (1, 1, 0, 12)),
        ((0, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (1, 1, 1, 12)),
        ((1, 1, 1), (1, 1, 1, 12)), ((1, 1, 1), (0, 1, 1, 12))
    ]

    codes = [
        60804, 60085, 60110, 60104, 60505, 60651, 60073, 60436, 60120, 60165,
        60160, 60641, 60432, 46327, 60633, 46324, 60099, 46394
    ]

    data = load_data_top_27()

    forecasts = {}
    for i, code in enumerate(codes):
        model = SARIMAX(data.loc[:, code],
                        order=new_sarima_orders[i][0],
                        seasonal_order=new_sarima_orders[i][1],
                        enforce_invertibility=False,
                        enforce_stationarity=False).fit()
        forecasts[code] = model.forecast(steps=12).values

    return forecasts
Exemplo n.º 3
0
def process_data6():
    series = pd.read_excel('../../Data/Styrene-Net Industry Average 2010-2015.xlsx', header=0,
                           index_col=0, parse_dates=True)
    series.index.freq = 'MS'

    data = series.copy()

    actuals = pd.read_excel('../../Data/Styrene-Net Industry Average 2015-2018 Actuals.xlsx',
                            header=0, index_col=0, parse_dates=True)

    actuals.index.freq = 'MS'

    #Test ranges
    data = data['2010-01-01':]

    model = SARIMAX(np.log(data['Styrene']), order=(1,1,2), seasonal_order=(0,0,1,12), enforce_invertibility = False, exog = data[['Oil_Lag', 'Gas_Lag']]).fit()

    #auto_arima(data['Styrene'], seasonal=True, m=12, enforce_invertibility = False, exog = data[['Oil_Lag']]).summary()

    preds = []

    for i in actuals.index:
        df = actuals.loc[i,:]
        df = pd.DataFrame(df).T
        fd = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']])
        fd.set_index = i+1
        fd = pd.DataFrame(fd).T

        fd2 = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']])
        fd2.set_index = i+2
        fd2 = pd.DataFrame(fd2).T

        fd3 = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']])
        fd3.set_index = i+3
        fd3 = pd.DataFrame(fd3).T

        fd4 = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']])
        fd4.set_index = i+4
        fd4 = pd.DataFrame(fd4).T

        fd5 = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']])
        fd5.set_index = i+5
        fd5 = pd.DataFrame(fd5).T

        df = pd.concat([df, fd, fd2, fd3, fd4, fd5])
        yhat_log = model.forecast(steps = 6, exog = df[['Oil_Lag', 'Gas_Lag']])
        yhat_log = yhat_log[[5]]
        yhat = numpy.exp(yhat_log)
        preds.append(yhat)
        act = pd.Series(actuals.loc[i,:])
        act = pd.DataFrame(act).T
        data = pd.concat([data, act], axis = 0)
        model = SARIMAX(np.log(data['Styrene']), order=(1,1,2), seasonal_order=(0,0,1,12), enforce_invertibility = False, exog = data[['Oil_Lag', 'Gas_Lag']]).fit()

    df = pd.DataFrame({'timestamp': [i.index for i in preds], 'value':[round(i[0],2) for i in preds]})
    df['timestamp'] = df.timestamp.apply(lambda x: str(x).split('[')[1].split(']')[0])
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.to_csv('../../Data/Results.csv', index = False)
Exemplo n.º 4
0
class SARIMAModel(SMModel):
    type = [ModelType.CONTINUOUS_PRICE, ModelType.UNIVARIATE]
    name = 'statsmodels.arima'
    default_params = {'order': (1, 1, 1)}

    @with_params
    def fit(self, x, **kwargs):
        params = kwargs.get('params')
        try:
            self.model = SARIMAX(x, order=params['order']) \
                    .fit(disp=params.get('disp',0))
            return self.model
        except (ValueError, np.linalg.linalg.LinAlgError):
            logger.error('ARIMA convergence error (order {} {} {})'.format(
                params['order'][0], params['order'][1], params['order'][2]))
            return None

    def predict(self, x, **kwargs):
        if not self.model:
            return None
        try:
            forecast = self.model.forecast(steps=x.shape[0])
            return to_discrete_double(forecast, -0.01, 0.01)
        except (ValueError, np.linalg.linalg.LinAlgError):
            logger.error('ARIMA convergence error (order {} {} {})'.format(
                self.params['order'][0], self.params['order'][1],
                self.params['order'][2]))

    @with_x
    def get_grid_search_configs(self, **kwargs):
        x_train = kwargs.get('x_train')
        x_test = kwargs.get('x_test')

        p_values = range(0, 6)
        d_values = range(0, 6)
        q_values = range(0, 6)
        # If series is stationary, don't apply differentiation
        adf = adfuller(x_train)  # 0 is score, 1 is pvalue
        if adf[1] < 0.05:  # Null hp rejected, series is stationary and requires no differentiation
            logger.info('Series is stationary, no need for differencing')
            d_values = [0]  # Set d = 0
        # Get all possible configs
        configs = []
        for p in p_values:
            for d in d_values:
                for q in q_values:
                    configs.append({
                        'params': {
                            'order': (p, d, q)
                        },
                        'x_train': x_train,
                        'x_test': x_test
                    })
        return configs
Exemplo n.º 5
0
 def f_ARIMA(self, O_Train, O_Test, order1, seasonal_order1):
     ar_model = SARIMAX(O_Train,
                        order=order1,
                        seasonal_order=seasonal_order1).fit()
     #pred = ar_model.predict(start=O_Test.index[0], end=O_Test.index[-1])
     p1 = O_Test.reset_index()
     pre = pd.DataFrame(ar_model.forecast(len(O_Test)))
     pre.reset_index(drop=True, inplace=True)
     pred1 = pd.concat([p1['Date'], pre], axis=1)
     pred1.columns = ['Date', 'pred']
     pred1 = pred1.set_index('Date')
     pred = pred1['pred']
     return pred
Exemplo n.º 6
0
def sarima_prediction(data, pollutant, p, q, length=1):
    seasonality = 7
    if pollutant == 'O3':
        d = 1
    else:
        d = 0
    order_arima = (p, d, q)
    order_sarima = (1, d, 1, seasonality)
    fit = SARIMAX(np.asarray(data),
                  order=order_arima,
                  seasonal_order=order_sarima,
                  initialization='approximate_diffuse').fit()
    # Forecast one value in the future
    return fit.forecast(length)
Exemplo n.º 7
0
class SARIMA_regressor(BaseEstimator, RegressorMixin):
    """Uses a SARIMAX model in a sklearn compatible regressor"""
    def __init__(
        self,
        endog_col,
        exog_cols,
        order,
        seasonal_order,
        measurement_error=True,
    ):
        """

        Parameters
        ----------
        endog_col : str
            Column in X for endogenous data
        exog_cols : list
            Column in X for exogenous data
        order : tuple
            (p,d,q) for ARIMA
        seasonal_order : [type]
            (P,D,Q,s) for SARIMA
        measurement_error : bool, optional
            Does the endog_col have measurement error?, by default True
        """
        self.endog_col = endog_col
        self.exog_cols = exog_cols
        self.order = order
        self.seasonal_order = seasonal_order
        self.measurement_error = measurement_error

    def fit(self, X, y=None):
        self.model = SARIMAX(
            X[self.endog_col],
            exog=X[self.exog_cols],
            order=self.order,
            seasonal_order=self.seasonal_order,
            measurement_error=self.measurement_error,
        ).fit()

        return self

    def predict(self, X):
        """
        Parameters
        ----------
        X : array-like
            Array like specifying the number of periods into the future to fit after end of X used in `fit()`
        """
        return self.model.forecast(X.shape[0], exog=X[self.exog_cols])
Exemplo n.º 8
0
def model_sarima(df, steps, kwargs):
    exog_to_train, exog_to_test = None, None
    if 'fourier' in kwargs and kwargs['fourier']:
        exog_to_train, exog_to_test = _get_fourier_terms(df, steps)

    # train
    try:
        model = SARIMAX(df,
                        order=kwargs['order'],
                        seasonal_order=kwargs['seasonal_order'],
                        exog=exog_to_train)
        model = model.fit(disp=-1)
    except:
        return None
    # predict
    return model.forecast(steps, exog=exog_to_test).reset_index(drop=True)
Exemplo n.º 9
0
    def DomesticModelMaking(self):
        self.Domesticdata = self.Domesticdata.set_index('InvoiceDate')

        sar = SARIMAX(
            self.Domesticdata['AvgNetFare'],
            order=(6, 2, 4),
            seasonal_order=(6, 2, 4, 12),
            trend='n',
        )
        sar = sar.fit()
        pred = sar.forecast(steps=30)
        pred = pd.DataFrame(pred, columns=['AvgNetFare'])

        predDomestic = pd.DataFrame(self.Domesticdata['AvgNetFare'])
        predDomestic.append(pred)

        predDomestic.to_csv('PredictedDomesticDataset.csv')
Exemplo n.º 10
0
def test_autoreg_predict_forecast_equiv(reset_randomstate):
    e = np.random.normal(size=1000)
    nobs = e.shape[0]
    idx = pd.date_range("2020-1-1", freq="D", periods=nobs)
    for i in range(1, nobs):
        e[i] = 0.95 * e[i - 1] + e[i]
    y = pd.Series(e, index=idx)
    m = AutoReg(y, trend="c", lags=1, old_names=False)
    res = m.fit()
    a = res.forecast(12)
    b = res.predict(nobs, nobs + 11)
    c = res.forecast("2022-10-08")
    assert_series_equal(a, b)
    assert_series_equal(a, c)
    sarimax_res = SARIMAX(y, order=(1, 0, 0), trend="c").fit(disp=False)
    d = sarimax_res.forecast(12)
    pd.testing.assert_index_equal(a.index, d.index)
Exemplo n.º 11
0
def predict_next_sales(best_params, dataset):
    order, sorder, trend = best_params
    model = SARIMAX(dataset,
                    order=order,
                    seasonal_order=sorder,
                    trend=trend,
                    enforce_stationarity=False,
                    enforce_invertibility=False)
    model = model.fit(disp=False)

    predictions = list()
    # split dataset
    train, test = train_test_split(dataset, num_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        yhat = sarima_forecast(history, best_params)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error

    # prints and saves accuracy final model
    error_2019 = measure_rmse(test, predictions)
    print("Estimated RMSE is ", error_2019)
    plt.close()
    plt.plot(predictions)
    plt.plot(history[-len(test):])
    plt.savefig("Final Model 2018 estimated sales.png")
    plt.close()

    predictions = model.forecast(3)
    print("Predictions are")
    print(predictions)

    # plot bar graph of predictions
    predictions.plot.bar()
    plt.savefig("2019 Forecast Bar Chart.png")
    plt.close()

    # plot line graph of predictions
    predictions.plot()
    plt.savefig("2019 Forecast Line plot.png")
def arima_best(fh, train, val, p_range, d_range, q_range, loss_metric="MSE"):
    '''
    fh : int. Forecast horizon. While validation set can be longer than
            the forecast horizon, only the fh portion of the validation set
            will be used to calculate score/loss, instead of forecasting the
            entire length of the validation set. This is to keep consistent with
            the actual use purpose of the model which will be to predict only
            the selected forecast horizon.
    p_range: tuple of 2
    d_range: tuple of 2
    q_range: tuple of 2
    '''
    # Hyperparameters tunning
    #print("Tuning p, d, q:")
    #print("-"*50)
    # true values to be scored again
    true = val[:fh]
    min_loss = float("inf")
    best_model = None
    best_p = best_d = best_q = None
    for p in range(*p_range):
        for d in range(*d_range):
            for q in range(*q_range):
                model = SARIMAX(train,
                                order=(p, d, q),
                                seasonal_order=(4, 1, 2, 8),
                                enforce_stationarity=False,
                                enforce_invertibility=False,
                                trend=None).fit(maxiter=100, method="powell")
                # make prediction
                predictions = model.forecast(fh)
                loss = loss_func(loss_metric, tensor=False)(true, predictions)
                if loss < min_loss:
                    min_loss = loss
                    best_model = model
                    best_p = p
                    best_d = d
                    best_q = q
                    #print(f"{p}, {d}, {q}: Validation {loss_metric} ", round(min_loss, 4), end="\r")
    #print("-"*50)
    #return (best_p, best_d, best_q)
    return best_model, (best_p, best_d, best_q)
def arima_evaluate(model, test, fh=8, refit=pd.Series(), metric=MAPE):
    '''
    model : SARIMAX model.
    test : pd Time series. Test data set.
    fh : int. Forecast horizon.
    refit : pd Time series. New time series data to refit the model on.
    '''
    if not refit.empty:
        params = model.params  # store previous parameters
        p_d_q = (model.model.k_ar_params, model.model.k_diff,
                 model.model.k_ma_params)
        model = SARIMAX(refit,
                        order=p_d_q,
                        enforce_stationarity=False,
                        enforce_invertibility=False,
                        trend=None).fit(params, maxiter=1000)
    pred = model.forecast(steps=fh)  # Forcast value
    true = test[:fh]  # true values
    loss = metric(pred.array, true.array)
    return pred, true, loss
Exemplo n.º 14
0
def sarimax_forecast(df):
    '''it takes a dataframe split it into train/forecast sets based on
    the availability of price and then forecasts electricity price for next hour.
    it returns forecast dataframe ('price','lower_interval', 'upper_interval') and
    historical price dataframe ('price')'''

    # split past and furture
    past = df[~df.price.isnull()]
    future = df[df.price.isnull()].drop('price', axis=1)
    # forecast for next time point only
    future = future.iloc[:1, :]
    if future.temp.isnull(
    )[0]:  # when weather forecast data is not available for that hour
        forecast = np.nan
        lower = np.nan
        upper = np.nan
        print('weather data is not available')
    else:
        past.index = pd.DatetimeIndex(past.index.values,
                                      freq=past.index.inferred_freq)
        # Build Model
        sarima = SARIMAX(past.price,
                         exog=past.drop('price', axis=1),
                         order=(1, 1, 1),
                         seasonal_order=(1, 0, 2, 7))
        sarima = sarima.fit(maxiter=300)
        # forecasting
        results = sarima.get_forecast(1, exog=future, alpha=0.05)
        forecast = sarima.forecast(1, exog=future, alpha=0.05)
        lower = results.conf_int()['lower price'][0]
        upper = results.conf_int()['upper price'][0]

    # create forecast df with datetimeIndex
    forecast = pd.DataFrame(dict(price=forecast,
                                 lower_interval=lower,
                                 upper_interval=upper),
                            index=future.index)
    forecast.index.name = 'date_time'
    past = past.iloc[-1:, 0]
    return forecast, past
Exemplo n.º 15
0
def sarimax_forecast(hour=11):
    '''hour: hour of a day, range(0, 23),
    returns forecast, upper_intervals, lower_intervals, mape, mase, test, train'''

    df_all = get_data(hour=hour)

    # split past and furture
    past = df_all[~df_all.price.isnull()]
    future = df_all[df_all.price.isnull()].drop('price', axis=1)

    future = future.iloc[:1, :]
    if future.temp.isnull()[0]:
        forecast = np.array([np.nan])
        confidence_int = pd.DataFrame(
            {
                'lower price': np.nan,
                'upper price': np.nan
            }, index=['x'])

    else:
        past.index = pd.DatetimeIndex(past.index.values,
                                      freq=past.index.inferred_freq)
        # Build Model
        sarima = SARIMAX(past.price,
                         past.drop('price', axis=1),
                         order=(1, 1, 1),
                         seasonal_order=(1, 0, 2, 7))
        sarima = sarima.fit(maxiter=300)
        # forecasting
        results = sarima.get_forecast(1, exog=future, alpha=0.05)
        forecast = sarima.forecast(1, exog=future, alpha=0.05)
        confidence_int = results.conf_int()
    # create forecast df with datetimeIndex
    lower = confidence_int['lower price'][0]
    upper = confidence_int['upper price'][0]
    forecast = pd.DataFrame(dict(price=forecast, lower=lower, upper=upper),
                            index=future.index)
    past = past.iloc[-1:, 0]
    return forecast, past
Exemplo n.º 16
0
def predictionArima(df):
    start_time = time.time()
    window = pd.DataFrame(columns=[
        'Current test', 'Current prediction', 'MSE',
        'Glycemia prediction RMSE (mg/dl)', 'PSW',
        'Prediction Horizon (minutes)'
    ])

    for n in PSW:
        for v in range(0, inter):

            interval = (v + 1) * 15
            windo = n / 12

            for x in range((len(df) - n - v)):

                #print(v, x)
                train = df.iloc[x:n + x]
                test = df.iloc[n + x:n + x + v + 1]

                model = SARIMAX(train,
                                order=orderArima,
                                enforce_stationarity=False,
                                enforce_invertibility=False).fit()

                #pred = result.predict(start= n+x, end= n+x+v, exog= test['sugarValue'])
                pred = model.forecast(step=v + 1)
                pred = pred.values
                #model = SARIMAX(df['sugarValue'], order=(0, 1, 3), seasonal_order=(0, 0, 0, 12), enforce_invertibility=False).fit()
                #pred = result.predict(n, n+v)

                window = app(window, train, test['sugarValue'], pred, interval,
                             windo)

            v = v + 1

    print("--- %s Seconds for computation ---" % (time.time() - start_time))
    return window
Exemplo n.º 17
0
    def sarimaParaSelect(self, classNo, trainLabel, testLabel, useAic=False):
        dataLength = len(trainLabel)
        data = pd.Series(trainLabel)
        for i in range(0, dataLength):
            data[i] = log(data[i] + 1)
        index = self.dtIndex[0:dataLength]
        data.index = pd.Index(index)

        minBias = 99999.0
        minAic = 99999.0
        (ar, ma) = (0, 0)
        label = array(testLabel)
        for p, q in [(1, 1), (0, 1), (1, 2), (2, 0), (2, 1), (2, 2)]:
            try:
                model = SARIMAX(data,
                                order=(p, 1, q),
                                seasonal_order=(0, 1, 1, 7)).fit()
                output = array(model.forecast(len(testLabel)))
                for i in range(0, len(testLabel)):
                    output[i] = exp(output[i]) - 1
                bias = math.sqrt(
                    sum((output - label) * (output - label)) / len(testLabel))
                if (bias < minBias
                        and (useAic == False or model.aic < minAic)):
                    (ar, ma) = (p, q)
                    minBias = bias
                    minAic = model.aic
                    bestOutput = output
            except:
                pass

        if (minBias < 90000.0):
            self.ParaChoose[classNo] = (ar, ma)
            return ((ar, ma), bestOutput)
        else:
            raise ValueError
Exemplo n.º 18
0
                      enforce_invertibility=False).fit()
agile_model.summary()

#just do deactive warnings regarding PyCharm and Numpy
# noinspection PyTypeChecker
agile_model_pred = np.exp(
    agile_model.predict(start=test_first_date,
                        end=test_last_date,
                        dynamic=True,
                        typ='levels'))

print(f'MAPE {np.round(mean_abs_pct_error(test_data,agile_model_pred),2)}%')
# print(f'MAE:{np.round(mean_absolute_error(test_data,agile_model_pred),2)}')

# noinspection PyTypeChecker
agile_model_forecast = np.exp(agile_model.forecast(steps=2))
print(agile_model_forecast)


def plot_prediciton(training_data, agile_model, agile_model_pred,
                    original_data):
    model_data = training_data.values[1:].reshape(-1) - agile_model.resid[1:]
    model_data = pd.concat((model_data, agile_model_pred))
    plt.figure(figsize=(16, 6))
    plt.plot(model_data)
    plt.plot(original_data[1:])
    plt.legend('Model Forecast', 'Original Data')
    plt.show()


plot_prediciton(train_data, agile_model, agile_model_pred, df['Last'])
                  n_jobs=1,
                  station)

auto = auto.fit(xTrain)
pred = auto.predict(len(xTest))

mean_squared_error(xTest, pred)
np.sqrt(mean_squared_error(xTest, pred))
"""Use of SARIMAX"""

sar = SARIMAX(
    xTrain,
    order=(6, 2, 4),
    seasonal_order=(6, 2, 4, 1),
    trend='n',
)
sar = sar.fit()

pred = sar.forecast(steps=len(xTest))

print(mean_squared_error(xTest, pred))
print(np.sqrt(mean_squared_error(xTest, pred)))

import pickle
# Saving model to disk
pickle.dump(sar, open('model.pkl', 'wb'))

# Loading model to compare the results
model = pickle.load(open('model.pkl', 'rb'))
print(model.predict([[2020 - 01 - 01]]))
Exemplo n.º 20
0
# bestModel:             SARIMAX(0, 1, 1)x(1, 1, 1, 52)

#经检测的最优训练模型
best_model = SARIMAX(df_day_train.tmax,
                     order=(0, 1, 1),
                     seasonal_order=(1, 1, 1, 52)).fit(disp=-1)

# tsa.plot_acf(best_model.resid[13:].values.squeeze(), lags=48,)
# # 下图是对残差进行的检验。可以确认服从正太分布,且不存在滞后效应。
# best_model.plot_diagnostics(lags=30, figsize=(16, 12))
# df_month2 = df_month_test[['tmax']]
# best_model.predict()  设定开始结束时间
# invboxcox函数用于还愿boxcox序列
# df_month2['forecast'] = invboxcox(best_model.forecast(steps=5), lmbda)
# 预测未来500个单位的数据
df_day2 = best_model.forecast(500)
# plt.figure(figsize=(15, 7))
#数据展示
plt.plot(df_day2)
df_day_train.tmax.plot(color='r', ls='--', label='Origin')
#保存图片
plt.savefig('长春week.png')
plt.show()

# 获取rmse
# 将预测数据切片
df_day2 = df_day2['20-':'2013']
# print(np.sqrt(sum((df_day2-ts)**2)/ts.size))

# save = pd.DataFrame(df_day2, columns = ['data', 'tmax'])
#保存预测数据
Exemplo n.º 21
0
def train_sarima(data=False,
                 hour=11,
                 split_date='2019-10-22 11:00:00',
                 n=30,
                 exog=False):
    '''hour: hour of a day, range(0, 23),
    split_date: train, test splitted on this date,
    n: number of days that will be forecasted,
    exog: in case of sarimax, takes (list of exog features, order, seasonal_order)
    returns forecast, upper_intervals, lower_intervals, mape, mase, test, train'''

    if isinstance(data, bool):
        if isinstance(exog, bool):
            df = get_daily(hour=hour)
        else:
            df = get_all(hour=hour)
    else:
        df = data
    # formating split_date
    split_date = pd.DatetimeIndex(np.array([split_date]))
    # get train and test for plotting only
    train = df[(df.index <= split_date[0])]
    test = df[(df.index > split_date[0]) & \
                      (df.index <= (split_date + pd.Timedelta(days=n))[0])]
    # will collect following information from forecast
    forecasts = []
    upper = []
    lower = []
    # loop over to get walk forward forecast for n days
    for i in range(1, n + 1):
        # walk one day forward to set train_set
        predict_date = df[df.index == split_date[0]].index + pd.Timedelta(
            days=i)
        train_set = df[df.index < predict_date[0]]
        train_set.index = pd.DatetimeIndex(train_set.index.values,
                                           freq=train_set.index.inferred_freq)
        # Build Model without exogenous features
        if isinstance(exog, bool):
            sarima = SARIMAX(train_set,
                             order=(1, 1, 1),
                             seasonal_order=(1, 0, 2, 7))
            sarima = sarima.fit(maxiter=200)
            # Forecast
            results = sarima.get_forecast(1, alpha=0.05)
            forecast = sarima.forecast(1, alpha=0.05)
            confidence_int = results.conf_int()
        # Build Model with exogenous features
        else:
            # StandardScaling the exogenous features
            # scaler = StandardScaler()
            # scaler = scaler.fit(train_set[['wind_speed', 'temp', 'humidity']])
            # train_set.loc[:,['wind_speed', 'temp', 'humidity']]=\
            # scaler.transform(train_set[['wind_speed', 'temp', 'humidity']])
            # training model
            sarima = SARIMAX(train_set.price,
                             exog=train_set[exog[0]],
                             order=exog[1],
                             seasonal_order=exog[2])
            sarima = sarima.fit(maxiter=200)
            # get features for forecast
            exog_fore = test[test.index == predict_date[0]][exog[0]]
            # scaling features for forecast
            # exog_fore.loc[:,['wind_speed', 'temp', 'humidity']]=\
            # scaler.transform(exog_fore[['wind_speed', 'temp', 'humidity']])
            # forecasting
            results = sarima.get_forecast(1, exog=exog_fore, alpha=0.05)
            forecast = sarima.forecast(1, exog=exog_fore, alpha=0.05)
            confidence_int = results.conf_int()
        # add forecast result into the list
        lower.append(confidence_int['lower price'][0])
        upper.append(confidence_int['upper price'][0])
        forecasts.append(forecast[0])

    # calculate the mape
    mape = get_mape(test.price, forecasts)
    mase = get_mase(test.price, forecasts, train.price)
    # create forecast df with datetimeIndex
    forecast = pd.DataFrame(forecasts, index=test.index, columns=['price'])

    return forecast, lower, upper, mape, mase, train, test
#打开文件
df = pd.read_csv('长春.csv',encoding='utf-8')
#加载date到时间轴
df.ds = pd.to_datetime(df.date)
df.index = df.ds
#加载tmax到变量
df['平均气温'].astype('double')#1
#展示训练前数据
df.drop(['date'], axis=1, inplace=True)
df.平均气温.plot(color='r', ls='--', label='Origin')#1
plt.show()
#按周拆分
df_day = df.resample('D').mean()
# 拆分出训练数据
df_day_train = df_day['2017-5-31':'2020-5-31']
#经检测的最优训练模型
best_model=SARIMAX(df_day_train.平均气温, order=(1, 1, 1),seasonal_order=(1, 1, 1, 90)).fit(disp=-1)
# 预测未来500个单位的数据
df_day2 = best_model.forecast(90)
# plt.figure(figsize=(15, 7))
#数据展示
plt.plot(df_day2)
df_day_train.平均气温.plot(color='r', ls='--', label='Origin')#1
#保存图片
plt.savefig('长春daytave.png')#2
plt.show()
# 将预测数据切片
df_day2=df_day2['2020-5-31':'2025-5-31']
#保存预测数据
df_day2.to_csv('长春daytave.csv')#2
Exemplo n.º 23
0
AIC = []
label = []

for p in range(0, 3):
    for d in range(0, 3):
        for q in range(0, 3):
            for P in range(0, 3):
                for D in range(0, 3):
                    for Q in range(0, 3):
                        model_fit = SARIMAX(
                            training,
                            order=(p, d, q),
                            seasonal_order=(P, D, Q, 12),
                            enforce_stationarity=False,
                            enforce_invertibility=False).fit(disp=-1)
                        forecast = model_fit.forecast(len(testing))
                        label.append(
                            int(
                                str(p) + str(d) + str(q) + str(P) + str(D) +
                                str(Q) + str(12)))
                        error.append(mse(testing, forecast))
                        AIC.append(model_fit.aic)
                        BIC.append(model_fit.bic)
                        print('ARIMA:', p, d, q, 'Seasonal:', P, D, Q)
                        del model_fit
                        del forecast

# Convert the results into a dataframe using pandas
import pandas as pd

BIC = pd.DataFrame(np.asarray(BIC).reshape(729, 1))
Exemplo n.º 24
0
    def fit_sarimax(self):

        # sarimax= auto_arima(y=self.data_lag[["fallecimientos"]],
        #                    exogenous=self.data_lag[["casos_total"]],
        #                    start_p=1, start_q=1,
        #                    test='adf',
        #                    max_p=2, max_q=2, m=7,
        #                    start_P=0, seasonal=True,
        #                    d=None, D=1, trace=False,
        #                    error_action='ignore',
        #                    suppress_warnings=True,
        #                    stepwise=True)

        sarimax = SARIMAX(endog=self.data_lag.iloc[:-1, ][["fallecimientos"]],
                          exog=self.data_lag.iloc[:-1, ][["casos_total"]],
                          order=(0, 0, 3),
                          seasonal_order=(0, 0, 0, 0)).fit()

        sum = sarimax.summary()
        predictions = pd.DataFrame(
            sarimax.forecast(steps=5, exog=self.forecast[["casos_total"]]))

        e = pd.DataFrame({
            "Modelo":
            "SARIMAX",
            "Predicción de hoy": [predictions.iloc[0, 0]],
            "Error de hoy": [
                abs(predictions.iloc[0, 0] -
                    self.dt.loc[len(self.dt) - 1, "fallecimientos"])
            ]
        })

        predictions["fecha"] = self.dt.loc[len(self.dt) - 1, "fecha"]
        predictions.columns = ["fallecimientos", "fecha"]
        predictions.reset_index(drop=True, inplace=True)
        for i in range(len(self.forecast)):
            c = 0
            c += i
            predictions.loc[i,
                            "fecha"] = predictions.fecha[i] + timedelta(days=c)

        new = pd.concat(
            (self.dt[["fallecimientos", "fecha"]], predictions.iloc[1:, :]),
            axis=0)

        new["Predicciones"] = np.where(
            new.fecha <= self.dt.loc[len(self.dt) - 1, "fecha"], "Real",
            "Pred")

        fig = px.bar(
            new,
            x="fecha",
            y="fallecimientos",
            color="Predicciones",
        )

        # predictions.columns =["Predicciones_Fallecimientos", "fecha"]
        #
        # load = str(self.dt.loc[len(self.dt)-1, "fecha"] - timedelta(days=1))
        # load = load[0:10] + "_.pkl"
        #
        # with open(load, "rb") as file:
        #     historic = pickle.load(file)
        # predictions["Error"] = 0
        # p=pd.concat([predictions.reset_index(drop=True), historic], ignore_index=True)
        # p = p.loc[p.fecha <= self.dt.loc[len(self.dt)-1, "fecha"],:]
        # p.reset_index(drop=True, inplace=True)
        # for i in range(0,len(p)):
        #     if self.dt.loc[len(self.dt)-1,"fecha"] == p.loc[i,"fecha"]:
        #         p.loc[i,"Error"] = np.sqrt((self.dt.loc[len(self.dt)-1,"fallecimientos"] - p.loc[i,"Predicciones_Fallecimientos"])**2)
        #
        # save = str(self.dt.loc[len(self.dt)-1, "fecha"])
        # save = save[0:10] + "_.pkl"
        #
        # with open(save, "wb") as file:
        #     pickle.dump(p, file)

        return e, fig, sum
Exemplo n.º 25
0
#     if aic < best_aic:
#         best_model = model
#         best_aic = aic
#         best_param = parameters
#     results.append([parameters, model.aic])
#
# result_table = pd.DataFrame(results)
# result_table.columns = ['parameters', 'aic']
# print(result_table.sort_values(by='aic', ascending=True).head())
# print(best_model.summary())

# bestModel:             SARIMAX(0, 1, 1)x(1, 1, 1, 12)

best_model=SARIMAX(df_day_train.tmax, order=(0, 1, 1),seasonal_order=(1, 1, 1, 52)).fit(disp=-1)

# tsa.plot_acf(best_model.resid[13:].values.squeeze(), lags=48,)
# # 下图是对残差进行的检验。可以确认服从正太分布,且不存在滞后效应。
# best_model.plot_diagnostics(lags=30, figsize=(16, 12))
# df_month2 = df_month_test[['tmax']]
# best_model.predict()  设定开始结束时间
# invboxcox函数用于还愿boxcox序列
# df_month2['forecast'] = invboxcox(best_model.forecast(steps=5), lmbda)
df_day2 = best_model.forecast(1000)
# plt.figure(figsize=(15, 7))
plt.plot(df_day2)
df_day_train.tmax.plot(color='r', ls='--', label='Origin')
plt.show()

# 获取mse

Exemplo n.º 26
0
#print(predictions)

#creating the basis of error in the test
error_test = check_error(compare_test_df['AveragePrice'],
                         compare_test_df['Predicted_AveragePrice'],
                         name_col='Value Comp. Pred.vs. Fit',
                         index_name='Testing Base')

print(' TEST and PREDICTION')
plot_compare_error(compare_test_df, len(compare_test_df) - 1)
print(error_test)

#dti = pd.date_range(data_index_max, periods=5, freq='W-SUN')
print("____________________________")
print("Forecast for one period")
print(model.forecast()[0])
#print("on")
#print(  dti[1] )
nstepsfor = int(15)
pred_uc = model.forecast(steps=nstepsfor)[0]

#print(pred_ci = pred_uc.conf_int())

print("CONFIDENCE INTERVALS")
print("____________________________")
print("Forecast for")
print(nstepsfor)

#for t in range(0,nstepsfor):
#    print(pred_uc[t])
        
        X_Test_CS.Country = le.fit_transform(X_Test_CS.Country)
        X_Test_CS['State'] = le.fit_transform(X_Test_CS['State'])
        
        X_Test_CS_Min_Date = X_Test_CS['Date'].min()
        X_Train_CS_Max_Date = X_Train_CS['Date'].max()


        #SARIMA Data
        model1 = SARIMAX(y1_Train_CS, order=(1,1,0), 
                        #seasonal_order=(1,1,0,12),
                        measurement_error=True).fit(disp=False)    
        model2 = SARIMAX(y2_Train_CS, order=(1,1,0), 
                        #seasonal_order=(1,1,0,12),
                        measurement_error=True).fit(disp=False)   
        y1_xpred = model1.forecast(X_Test_CS[X_Test_CS['Date'] > X_Train_CS_Max_Date].shape[0])
        y2_xpred = model2.forecast(X_Test_CS[X_Test_CS['Date'] > X_Train_CS_Max_Date].shape[0])
        
        train_confirmed_y1 = X_Train_CS[(X_Train_CS['Date'] >=  X_Test_CS_Min_Date)]['ConfirmedCases']
        train_confirmed_y2 = X_Train_CS[(X_Train_CS['Date'] >=  X_Test_CS_Min_Date)]['Fatalities']
        
        y1_xpred = np.concatenate((train_confirmed_y1,y1_xpred), axis = 0)
        y2_xpred = np.concatenate((train_confirmed_y2,y2_xpred), axis = 0)
        
        
        #Simple Linear Model witnout Enchancing the Data
        #After we transform them they should roughly follow linear regression trend
        X_Train_CS = X_Train_CS.loc[:, ['State', 'Country', 'Date']]
#        y1_Train_CS = y1_Train_CS.apply(lambda x: np.log1p(x))
#        y2_Train_CS = y2_Train_CS.apply(lambda x: np.log1p(x))
#        train_confirmed_y1 = train_confirmed_y1.apply(lambda x: np.log1p(x))
Exemplo n.º 28
0
            results = mod.fit()
            if results.aic < a:
                a = results.aic
                s = 'ARIMA{}x{} - AIC:{}'.format(param, param_seasonal, results.aic)
        except:
            continue
print(s)
'''
pdq = (0, 1, 1)
PDQ = (1, 1, 1, 4)
model_train = SARIMAX(train.REVENUE,
                      order=pdq,
                      seasonal_order=PDQ,
                      enforce_stationarity=False).fit()
predict_train = model_train.forecast(test_size + 1)

model_run = SARIMAX(df.REVENUE, order=pdq, seasonal_order=PDQ).fit()
predict_run = model_run.forecast(1)

#residual = predict_train - test
'''
print(model_train.summary())
model_train.plot_diagnostics()
'''
print(predict_run[0])
plt.plot(df.REVENUE, label='df', marker='o')
plt.plot(predict_train, label='SARIMA', marker='o', linestyle='--')
plt.plot(predict_run, label='SARIMA_RUN', marker='o')
plt.legend(loc='best')
Exemplo n.º 29
0
class SARIMAXModel(ModelStrategy):
    '''
    A class for a Seasonal Autoregressive Integrated Moving Average Model and the standard operations on it
    '''
    def __init__(self, hparams, log_dir=None):
        univariate = True
        model = None
        name = 'SARIMAX'
        self.auto_params = hparams.get('AUTO_PARAMS', False)
        self.trend_p = int(hparams.get('TREND_P', 10))
        self.trend_d = int(hparams.get('TREND_D', 2))
        self.trend_q = int(hparams.get('TREND_Q', 0))
        self.seasonal_p = int(hparams.get('SEASONAL_P', 5))
        self.seasonal_d = int(hparams.get('SEASONAL_D', 2))
        self.seasonal_q = int(hparams.get('SEASONAL_Q', 0))
        self.m = int(hparams.get('M', 12))
        super(SARIMAXModel, self).__init__(model,
                                           univariate,
                                           name,
                                           log_dir=log_dir)

    def fit(self, dataset):
        '''
        Fits a SARIMAX forecasting model
        :param dataset: A Pandas DataFrame with 2 columns: Date and Consumption
        '''
        if dataset.shape[1] != 2:
            raise Exception(
                'Univariate models cannot fit with datasets with more than 1 feature.'
            )
        dataset.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                       inplace=True)
        series = dataset.set_index('ds')
        if self.auto_params:
            best_model = pmdarima.auto_arima(
                series,
                seasonal=True,
                stationary=False,
                m=self.m,
                information_criterion='aic',
                max_order=2 * (self.p + self.q),
                max_p=2 * self.p,
                max_d=2 * self.d,
                max_q=2 * self.q,
                max_P=2 * self.p,
                max_D=2 * self.d,
                max_Q=2 * self.q,
                error_action='ignore'
            )  # Automatically determine model parameters
            order = best_model.order
            seasonal_order = best_model.seasonal_order
            print("Best SARIMAX params: (p, d, q):", best_model.order,
                  " and  (P, D, Q, s):", best_model.seasonal_order)
        else:
            order = (self.trend_p, self.trend_d, self.trend_q)
            seasonal_order = (self.seasonal_p, self.seasonal_d,
                              self.seasonal_q, self.m)
        self.model = SARIMAX(series,
                             order=order,
                             seasonal_order=seasonal_order,
                             enforce_stationarity=True,
                             enforce_invertibility=True).fit()
        print(self.model.summary())
        return

    def evaluate(self, train_set, test_set, save_dir=None, plot=False):
        '''
        Evaluates performance of SARIMAX model on test set
        :param train_set: A Pandas DataFrame with 2 columns: Date and Consumption
        :param test_set: A Pandas DataFrame with 2 columns: Date and Consumption
        :param save_dir: Directory in which to save forecast metrics
        :param plot: Flag indicating whether to plot the forecast evaluation
        '''
        train_set.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                         inplace=True)
        test_set.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                        inplace=True)
        train_set = train_set.set_index('ds')
        test_set = test_set.set_index('ds')
        train_set["model"] = self.model.fittedvalues
        test_set["forecast"] = self.forecast(
            test_set.shape[0])['Consumption'].tolist()

        df_forecast = train_set.append(test_set).rename(columns={'y': 'gt'})
        test_metrics = self.evaluate_forecast(df_forecast,
                                              save_dir=save_dir,
                                              plot=plot)
        return test_metrics

    def forecast(self, days, recent_data=None):
        '''
        Create a forecast for the test set. Note that this is different than obtaining predictions for the test set.
        The model makes a prediction for the provided example, then uses the result for the next prediction.
        Repeat this process for a specified number of days.
        :param days: Number of days into the future to produce a forecast for
        :param recent_data: A factual example for the first prediction
        :return: An array of predictions
        '''
        forecast_df = self.model.forecast(steps=days).reset_index(level=0)
        forecast_df.columns = ['Date', 'Consumption']
        return forecast_df

    def save(self, save_dir, scaler_dir=None):
        '''
        Saves the model to disk
        :param save_dir: Directory in which to save the model
        '''
        if self.model:
            model_path = os.path.join(save_dir,
                                      self.name + self.train_date + '.pkl')
            self.model.save(model_path)  # Serialize and save the model object

    def load(self, model_path, scaler_path=None):
        '''
        Loads the model from disk
        :param model_path: Path to saved model
        '''
        if os.path.splitext(model_path)[1] != '.pkl':
            raise Exception('Model file path for ' + self.name +
                            ' must have ".pkl" extension.')
        self.model = SARIMAXResults.load(model_path)
        return
        adjusted_y_train_fatalities = y_train_fatalities[
            idx:]  #.values.reshape(-1, 1)
        idx = X_pred[X_pred[feature_use] == 0].shape[0]
        adjusted_X_pred = X_pred[idx:][feature_use].values.reshape(-1, 1)

        pred_data = test[(test['Country_Region'] == country)
                         & (test['Province_State'] == province)]
        max_train_date = train[(train['Country_Region'] == country) & (
            train['Province_State'] == province)]['Date'].max()
        min_test_date = pred_data['Date'].min()
        model = SARIMAX(
            adjusted_y_train_confirmed,
            order=(1, 1, 0),
            #seasonal_order=(1,1,0,12),
            measurement_error=True).fit(disp=False)
        y_hat_confirmed = model.forecast(
            pred_data[pred_data['Date'] > max_train_date].shape[0])
        y_train_confirmed = train[(train['Country_Region'] == country)
                                  & (train['Province_State'] == province) &
                                  (train['Date'] >=
                                   min_test_date)]['ConfirmedCases'].values
        y_hat_confirmed = np.concatenate((y_train_confirmed, y_hat_confirmed),
                                         axis=0)

        model = SARIMAX(
            adjusted_y_train_fatalities,
            order=(1, 1, 0),
            #seasonal_order=(1,1,0,12),
            measurement_error=True).fit(disp=False)
        y_hat_fatalities = model.forecast(
            pred_data[pred_data['Date'] > max_train_date].shape[0])
        y_train_fatalities = train[(train['Country_Region'] == country)