示例#1
0
def q3_d():
    print("begin")
    df = get_data("data/HW5_WMT.xlsx", "HW5_WMT")
    df.index = pd.to_datetime(df.index, format='%Y%m%d')
    df['first_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(1)
    df['season_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(4)
    df_test = df.tail(len(df.index) - df.index.get_loc('2016-03-31'))
    df_test = df_test.head(df_test.index.get_loc('2020-03-31'))
    df_p = df.head(df.index.get_loc('2016-03-31'))
    print(df_test)
    rst_arima_list = []
    rst_airline_list = []
    i = 1
    for index in df_test.index:
        ARIMA_model = ARIMA(np.log(df_p['WMT']),
                            order=(0, 1, 1)).fit()  # p=0, d=1, q=1
        airline_model = ARIMA(np.log(df_p['WMT']),
                              order=(0, 1, 1),
                              seasonal_order=(0, 1, 1, 4)).fit()
        rst_arima_list.append(ARIMA_model.forecast()[0])
        rst_airline_list.append(airline_model.forecast()[0])
        df_p = df.head(df.index.get_loc('2016-03-31') + i)
        i += 1

    plt.plot(df_test.index, rst_arima_list, label='ARIMA Model')
    plt.plot(df_test.index, rst_airline_list, label='AIRLINE Model')
    np.log(df_test['WMT']).plot(label='Reality')
    plt.legend()
    plt.show()
示例#2
0
 def arimamodel(self,
                train,
                test,
                ar_param,
                order,
                ma_param,
                iterative=True):
     history = [x for x in train]
     preds = list()
     stamps = [int(len(test) / 10) * i for i in range(1, 10)]
     if iterative:
         for i in range(len(test)):
             arima_model = ARIMA(history, order=(ar_param, order, ma_param))
             arima_model = arima_model.fit()
             output = arima_model.forecast()
             yhat = output[0]
             preds.append(yhat)
             obs = test[i]
             history.append(obs)
             if i in stamps:
                 print(
                     str(datetime.now()) +
                     ': Arima Prediction {}0% Complete: {} out of {}'.
                     format((stamps.index(i) + 1), i, len(test)))
     else:
         arima_model = ARIMA(history, order=(ar_param, order, ma_param))
         arima_model = arima_model.fit()
         preds = arima_model.predict(start=len(history),
                                     end=len(history) + len(test) - 1)
     return preds
示例#3
0
def q3_e():
    print("begin")
    df = get_data("data/HW5_WMT.xlsx", "HW5_WMT")
    df.index = pd.to_datetime(df.index, format='%Y%m%d')
    df['first_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(1)
    df['season_difference'] = np.log(df['WMT']) - np.log(df['WMT']).shift(4)
    df_test = df.tail(len(df.index) - df.index.get_loc('2016-03-31'))
    df_test = df_test.head(df_test.index.get_loc('2020-03-31'))
    df_p = df.head(df.index.get_loc('2016-03-31'))
    print(df_test)
    rst_arima_list = []
    rst_airline_list = []
    i = 1
    for index in df_test.index:
        ARIMA_model = ARIMA(np.log(df_p['WMT']),
                            order=(0, 1, 1)).fit()  # p=0, d=1, q=1
        airline_model = ARIMA(np.log(df_p['WMT']),
                              order=(0, 1, 1),
                              seasonal_order=(0, 1, 1, 4)).fit()
        rst_arima_list.append(ARIMA_model.forecast()[0])
        rst_airline_list.append(airline_model.forecast()[0])
        df_p = df.head(df.index.get_loc('2016-03-31') + i)
        i += 1

    rst_arima_error_list = []
    rst_airline_error_list = []
    for j in range(0, len(df_test), 1):
        rst_arima_error_list.append(
            np.log(df_test.iloc[j].at['WMT']) - rst_arima_list[j])
        rst_airline_error_list.append(
            np.log(df_test.iloc[j].at['WMT']) - rst_airline_list[j])

    plt.plot(df_test.index, rst_arima_error_list, label='ARIMA ERROR')
    plt.plot(df_test.index, rst_airline_error_list, label='AIRLINE ERROR')
    plt.legend()
    plt.show()

    arima_mse = 0
    airline_mse = 0
    for j in range(0, len(df_test), 1):
        arima_mse += rst_arima_error_list[j] * rst_arima_error_list[j]
        airline_mse += rst_airline_error_list[j] * rst_airline_error_list[j]
    arima_mse = arima_mse / len(df_test)
    airline_mse = airline_mse / len(df_test)
    print("arima_mse = ", arima_mse)
    print("airline_mse = ", airline_mse)
    def get_forecast(self, finance_data):
        self._logger.info('Calculating forecast with the given data...')
        # Assuming that we've properly trained the model before and that the
        # hyperparameters are correctly tweaked, we use the full dataset to fit
        y = finance_data['Low'].values
        model = ARIMA(y, order=(5, 0, 1)).fit()
        forecast = model.forecast(steps=1)[0]

        # Returning the last real data and the forecast for the next minute
        return (y[len(y) - 1], forecast)
def get_forecast():
    df = get_finance_data()

    # Assuming that we've properly trained the model before and that the
    # hyperparameters are correctly tweaked, we use the full dataset to fit
    y = df['Low'].values
    model = ARIMA(y, order=(5, 0, 1)).fit()
    forecast = model.forecast(steps=1)[0]

    # Returning the last real data and the forecast for the next minute
    return (y[len(y) - 1], forecast)
示例#6
0
def _find_optimal_model(train, val, test, data_props, examples):

    results = []
    for row in train['y_data']:
        model = auto_arima(row, trace=True)
        results.append(model.order)

    ARIMA_potentials = list(dict.fromkeys(results))

    val_results = {}
    for props in ARIMA_potentials:
        if props not in val_results:
            val_results[props] = {'val': {}, 'test': {}}
        for set, X, y in zip(['val', 'test'], [
                train['y_data'],
                np.concatenate((train['y_data'], val['y_data']), axis=1)
        ], [val['y_data'], test['y_data']]):
            for i in range(len(X)):
                mod = ARIMA(X[i], order=props).fit()
                y_pred = float(mod.forecast())
                y_true = float(y[i])
                mae = abs(y_pred - y_true)
                mda = int(
                    np.sign(y_pred) == np.sign(y_true) or np.sign(
                        np.round(y_pred, 4)) == np.sign(np.round(y_true, 4)))
                mse = (y_pred - y_true)**2
                pos = int(np.sign(np.round(y_true, 6)))
                for err, vale in zip(['mae', 'mda', 'mse', 'pos'],
                                     [mae, mda, mse, pos]):
                    if 'mae' not in val_results[props][set]:
                        val_results[props][set] = {
                            'mae': [],
                            'mda': [],
                            'mse': [],
                            'pos': []
                        }
                    val_results[props][set][err].append(vale)

    final_results = {}
    for props in ARIMA_potentials:
        if props not in final_results.items():
            final_results[props] = {}
        for set in ['val', 'test']:
            for err, vals in val_results[props][set].items():
                final_results[props][f'{set}_{err}'] = statistics.mean(vals)
                if props != (0, 0,
                             0) and (f'{set}_best_score' not in final_results
                                     or final_results[f'{set}_best_score'] >
                                     final_results[props][f'{set}_{err}']):
                    final_results[f'{set}_best_score'] = final_results[props][
                        f'{set}_{err}']
                    final_results[f'{set}_best_param'] = props

    return final_results
示例#7
0
def arima(args):
    train, test = getData(args)
    train = train.transpose((1, 0))
    test = test.transpose((1, 0))
    res_list = []
    for i in range(121):
        model = ARIMA(train[i], order=(5, 1, 5)).fit()
        res = model.forecast(744)
        res_list.append(res)
    res_list = np.array(res_list)

    # res = np.load('./save/arima_res.npy',allow_pickle=True)
    test = test.reshape((test.shape[0], 11, 11))
    print(res.shape)
    print(test.shape)
    print('ARIMA: ')
    # print('RMSE: {}'.format(RMSE(y_pred=res, y_true=test)))
    print('MAPE: {}'.format(MAPE(y_pred=res, y_true=test)))
示例#8
0
def get_arima(data):
    model = ARIMA(data, order=(1,1,1))
    model = model.fit()
    preds = model.forecast(DAYS_TO_PREDICT)
    return preds.tolist()
示例#9
0
output['value']['Critical Value(10%)'] = t[4]['10%']
print(output)  #这里ts1没有过,ADF检验,但是看时序图,较为平稳.有几个周期的方差较大怀疑是异方差

#做lm检验模型是显著的异方差,但是书中根据经验判断,适用arima模型.
#那么臆测一下,异方差由于历史的几个周期导致,因为数据比较历史.并且周期少
#那么可以认为适用arima,解决之后开始绘制自相关图和偏自相关图
c = acorr_lm(data['ts1'].dropna())
print(c)

# lag_acf = acf(data['ts1'].dropna(), nlags=10,fft=False)
# lag_pacf = pacf(data['ts1'].dropna(), nlags=10, method='ols')
# fig, axes = plt.subplots(1,2, figsize=(20,5))
# plot_acf(data['ts1'].dropna(), lags=10, ax=axes[0])
# plot_pacf(data['ts1'].dropna(), lags=10, ax=axes[1], method='ols')
# plt.show(block=True)

# 疏系数模型书中给出的是ARIMA((1,4),1,0)
# 但是我在看貌似ARIMA((1,4),1,1)更好些

# order_trend=arma_order_select_ic(data['ts1'].dropna())#这里由于异方差,可能没给出最好结果
# print(order_trend['bic_min_order'])

# python疏系数方法,对比了arima(4,1,0)和(4,1,1)后根据AIC和BIC使用(4,1,0)更好
result_trend = ARIMA(data['fertility'],
                     order=(4, 1, 0),
                     enforce_stationarity=False)
with result_trend.fix_params({'ar.L2': 0, 'ar.L3': 0}):
    result_trend = result_trend.fit()
    print(result_trend.param_names)
    print(result_trend.forecast())
示例#10
0
df = google.history(
    period='1d',
    interval="1m",
)

low_df = df[['Low']]
low_df['date'] = pd.to_datetime(low_df.index).time
low_df.set_index('date', inplace=True)

X = df.index.values
y = df['Low'].values

offset = int(0.1 * len(df))

X_train = X[:-offset]
y_train = y[:-offset]
X_test = X[-offset:]
y_test = X[-offset:]

plt.plot(range(0, len(y_train)), y_train, label='Train')
plt.show()

model = ARIMA(y_train, order=(5, 0, 1)).fit()

forecast = model.forecast(steps=1)[0]

print('Real data for time 0: %f' % y_train[len(y_train) - 1])
print('Real data for time 1: %f' % y_test[0])
print('pred data for time 1: %f' % forecast)
# this is terrible
示例#11
0
class ARIMAModel(ModelStrategy):
    '''
    A class for an Autoregressive Integrated Moving Average Model and the standard operations on it
    '''
    def __init__(self, hparams, log_dir=None):
        univariate = True
        model = None
        name = 'ARIMA'
        self.auto_params = hparams['AUTO_PARAMS']
        self.p = int(hparams.get('P', 30))
        self.d = int(hparams.get('D', 0))
        self.q = int(hparams.get('Q', 0))
        super(ARIMAModel, self).__init__(model,
                                         univariate,
                                         name,
                                         log_dir=log_dir)

    def fit(self, dataset):
        '''
        Fits an ARIMA forecasting model
        :param dataset: A Pandas DataFrame with 2 columns: Date and Consumption
        '''
        if dataset.shape[1] != 2:
            raise Exception(
                'Univariate models cannot fit with datasets with more than 1 feature.'
            )
        dataset.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                       inplace=True)
        series = dataset.set_index('ds')
        if self.auto_params:
            best_model = pmdarima.auto_arima(series,
                                             seasonal=False,
                                             stationary=False,
                                             information_criterion='aic',
                                             max_order=2 * (self.p + self.q),
                                             max_p=2 * self.p,
                                             max_d=2 * self.d,
                                             max_q=2 * self.q,
                                             error_action='ignore')
            order = best_model.order
            print("Best ARIMA params: (p, d, q):", best_model.order)
        else:
            order = (self.p, self.d, self.q)
        self.model = ARIMA(series, order=order).fit()
        print(self.model.summary())
        return

    def evaluate(self, train_set, test_set, save_dir=None, plot=False):
        '''
        Evaluates performance of ARIMA model on test set
        :param train_set: A Pandas DataFrame with 2 columns: Date and Consumption
        :param test_set: A Pandas DataFrame with 2 columns: Date and Consumption
        :param save_dir: Directory in which to save forecast metrics
        :param plot: Flag indicating whether to plot the forecast evaluation
        '''
        train_set.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                         inplace=True)
        test_set.rename(columns={
            'Date': 'ds',
            'Consumption': 'y'
        },
                        inplace=True)
        train_set = train_set.set_index('ds')
        test_set = test_set.set_index('ds')
        train_set["model"] = self.model.fittedvalues
        test_set["forecast"] = self.forecast(
            test_set.shape[0])['Consumption'].tolist()

        df_forecast = train_set.append(test_set).rename(columns={'y': 'gt'})
        test_metrics = self.evaluate_forecast(df_forecast,
                                              save_dir=save_dir,
                                              plot=plot)
        return test_metrics

    def forecast(self, days, recent_data=None):
        '''
        Create a forecast for the test set. Note that this is different than obtaining predictions for the test set.
        The model makes a prediction for the provided example, then uses the result for the next prediction.
        Repeat this process for a specified number of days.
        :param days: Number of days into the future to produce a forecast for
        :param recent_data: A factual example for the first prediction
        :return: An array of predictions
        '''
        forecast_df = self.model.forecast(steps=days).reset_index(level=0)
        forecast_df.columns = ['Date', 'Consumption']
        return forecast_df

    def save(self, save_dir, scaler_dir=None):
        '''
        Saves the model to disk
        :param save_dir: Directory in which to save the model
        '''
        if self.model:
            model_path = os.path.join(save_dir,
                                      self.name + self.train_date + '.pkl')
            self.model.save(model_path)  # Serialize and save the model object

    def load(self, model_path, scaler_path=None):
        '''
        Loads the model from disk
        :param model_path: Path to saved model
        '''
        if os.path.splitext(model_path)[1] != '.pkl':
            raise Exception('Model file path for ' + self.name +
                            ' must have ".pkl" extension.')
        self.model = ARIMAResults.load(model_path)
        return
# invert differenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]


# seasonal difference
X = btc['close'].astype(float)
duration = int(168)
differenced = difference(X, duration)

# fit model
model = ARIMA(differenced, order=order).fit(disp=0)

# multi-step out-of-sample forecast
fcast = model.forecast(steps=fcast_out)[0]

# Walk forward validation
predict = [x for x in X]
hour = 1

# invert the differenced forecast
for yhat in fcast:
    inverted = inverse_difference(predict, yhat, duration)
    #print('Minute %d: %f' % (minute, inverted))
    predict.append(inverted)
    hour += 1

from pandas import DataFrame

fcast2 = DataFrame(predict[-fcast_out:])