Пример #1
0
    def core_model(self):
        predicted = []
        error = []
        for i in range(len(self.X_test)):
            # Rebuild Model every iteration
            model = ARIMA(self.X_train, order = (5,1,0)).fit(disp=0)

            # Predict test data
            predicted.append(model.forecast()[0])
            
            # Update training data
            self.X_train.append(self.X_test[i])
            
            #Calculate error
            error.append(math.fabs(predicted[i][0] - self.X_test[i]))

            #print('%f, predicted=%f, expected=%f, error=%f' % (i, predicted[i][0], self.X_test[i], error[i]))

        self.mse = self.get_mse(self.X_test, predicted)
        self.mae = self.get_mae(self.X_test, predicted)
        
        #To save model
        model.save('arima_model.pkl')

        print("Mean Squared error = {:.4f}".format(self.mse))
        print("Mean Absolute error = {:.4f}".format(self.mae))
Пример #2
0
model = Models.TABL(template, dropout, projection_regularizer,
                    projection_constraint, attention_regularizer,
                    attention_constraint)
model.summary()

# create class weight
class_weight = {0: 1e6 / 300.0, 1: 1e6 / 400.0, 2: 1e6 / 300.0}

# trainin          # remove .iloc[2:] for single day
model.fit(X_train,
          lbls_train.iloc[2:],
          batch_size=256,
          epochs=100,
          class_weight=class_weight)

model.save('model.h5')

pred = model.predict(X_train)

total = pd.concat((stock_train, stock_test), axis=0)
inputs = total[len(total) - len(stock_test) - n_past:].values
inputs = sc_predict.transform(inputs)
# n_past + (n_future - 1)
add = np.zeros((n_past + (n_future - 1), stock.shape[1]))
inputs = np.vstack((inputs, add))

X_test = []
for i in range(n_past, len(inputs) - n_future + 1):
    X_test.append(inputs[i - n_past:i, 0:stock.shape[1]])

X_test = np.array(X_test)
最小的p值和q值为:p = 0, q = 0
'''


# 第   4   步--C盘---------模型检验
# 确定模型后,需要检验其残差序列是否是白噪声,若不是,说明,残差中还存在有用的信息,需要修改模型或者进一步提取。
# 若其残差不是白噪声,重新更换p,q的值,重新确定

while 1:
    p, q = matrix.stack().idxmin()
    print('最小的p值和q值为:p = %s, q = %s' % (p, q))

    lagnum = 12
    arima = ARIMA(xdata2, (p, 1, q)).fit()
    arima.save('arima_BIC.pkl')  # 保存模型

    xdata_pred = arima.predict(typ='levels')
    pred_error = (xdata_pred - xdata2).dropna()
    print('pred_error:\n', pred_error)

    # 白噪音检验
    lbx, px = acorr_ljungbox(pred_error, lags=lagnum)
    print('pred_error的p值为:', px)
    h = (px < 0.05).sum()
    print('h=', h)
    if h > 0:
        print('模型ARIMA(%s,1, %s)不符合白噪音检验' % (p, q))
        print('在BIC矩阵中去掉[%s,%s]组合,重新进行计算' % (p, q))
        matrix.iloc[p, q] = np.nan
        arimafail = arima
Пример #4
0
                    validation_data=(X_test, y_test))

# + colab={"base_uri": "https://localhost:8080/", "height": 291} colab_type="code" id="slgKH-bqcWwe" outputId="e918d0d2-117f-43cf-f50b-e8b5503bcd98"
# Check for overfitting, which is when val_loss starts to go up but
# loss stays decreases or stays constant.

plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Learning curve')
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend()
plt.show()

# + colab={} colab_type="code" id="6IvVquwzKqQD"
model.save('model_id_revenue_prediction.h5')

# + colab={"base_uri": "https://localhost:8080/", "height": 429} colab_type="code" id="2t24z4WzOSG2" outputId="0282cce6-ef89-417d-8de6-19a40c62a74d"
# Get a prediction from our model for our data and plot it against the truth

y_pred = model.predict(X)

df_pred = pd.DataFrame(index=X.index, data={'predictions': y_pred.ravel()})
df_pred
# -

type(y_pred)

# + colab={"base_uri": "https://localhost:8080/", "height": 479} colab_type="code" id="ppUviuuZO-oQ" outputId="2b858b5a-b3e5-4dad-f358-391e48803eb4"
fig, ax = plt.subplots(figsize=(15, 8))
ax.plot(y, label='Truth', marker='o')
Пример #5
0
class Model:
    def __init__(self, model_folder_abs_path: str):
        self._model_folder_abs_path: str = model_folder_abs_path

        if not os.path.exists(self._model_folder_abs_path):
            os.makedirs(self._model_folder_abs_path)

        model_file_abs_path = self._get_model_abs_file_path()
        model_meta_data_file_abs_path = self._get_model_meta_data_abs_file_path(
        )
        if os.path.exists(model_file_abs_path) and os.path.exists(
                model_meta_data_file_abs_path):
            self._arima_model: ARIMAResults = ARIMAResults.load(
                model_file_abs_path)
            with open(model_meta_data_file_abs_path) as file_meta_data:
                self._meta_data = json.load(file_meta_data)
        else:
            self._arima_model: ARIMAResults = None
            self._meta_data = None

    def _get_model_abs_file_path(self):
        return self._model_folder_abs_path + "/" + consts_model.MODEL_FILE_NAME

    def _get_model_meta_data_abs_file_path(self):
        return self._model_folder_abs_path + "/" + consts_model.META_DATA_FILE_NAME

    def train(self, train_data: pd.DataFrame):
        if self._arima_model is None:
            list_train_data = train_pre_processor.convert_to_model_data(
                train_data)

            self._arima_model = ARIMA(list_train_data,
                                      order=config_model.ORDER_FOR_MODEL)
            self._arima_model = self._arima_model.fit(disp=0)

            self._meta_data = {
                consts_model.META_DATA_KEY_DATA_END_DATE:
                train_data.index.max().strftime(
                    consts_model.DEFAULT_DATETIME_FORMAT),
                consts_model.META_DATA_KEY_DATA_TRAIN_DATA_SET:
                [label[0] for label in list_train_data]
            }
        else:
            raise Exception("This directory contains a trained model.")

    def save(self):
        self._arima_model.save(self._get_model_abs_file_path())
        with open(self._get_model_meta_data_abs_file_path(),
                  "w") as file_meta_data:
            json.dump(self._meta_data, file_meta_data)

    def test(self, train_data: pd.DataFrame, test_data: pd.DataFrame):
        list_train_data = train_pre_processor.convert_to_model_data(train_data)
        model_for_test: ARIMAResults = self._arima_model
        np_test_data = test_data.values

        test_results = []
        for test_index in range(len(np_test_data)):
            test_date = test_data.index[test_index]
            predicted_value = model_for_test.forecast()[0]
            expected_value = np_test_data[test_index][0]
            test_results.append({
                consts_model.TEST_COLUMN_KEY_DATE:
                test_date,
                consts_model.TEST_COLUMN_KEY_PREDICTED:
                predicted_value,
                consts_model.TEST_COLUMN_KEY_EXPECTED:
                expected_value
            })

            list_train_data.append(expected_value)
            model_for_test = ARIMA(list_train_data,
                                   order=config_model.ORDER_FOR_MODEL)
            model_for_test = model_for_test.fit(disp=0)

        return test_results

    def get_mean_squared_error(self, test_data: pd.DataFrame,
                               test_results: list):
        np_test_data = test_data.values
        predictions = [
            test_result[consts_model.TEST_COLUMN_KEY_PREDICTED]
            for test_result in test_results
        ]

        return mean_squared_error(np_test_data, predictions)

    def plot_results(self,
                     test_data: pd.DataFrame,
                     test_results: list,
                     color="red"):
        np_test_data = test_data.values
        predictions = [
            test_result[consts_model.TEST_COLUMN_KEY_PREDICTED]
            for test_result in test_results
        ]

        pyplot.plot(np_test_data)
        pyplot.plot(predictions, color=color)
        pyplot.show()

    def predict(self, on_date_str: str):
        if self._arima_model is None:
            raise Exception("This model has not been fitted.")
        else:
            on_date = datetime.strptime(on_date_str,
                                        consts_model.DEFAULT_DATETIME_FORMAT)
            model_train_end_date = datetime.strptime(
                self._meta_data[consts_model.META_DATA_KEY_DATA_END_DATE],
                consts_model.DEFAULT_DATETIME_FORMAT)
            if utils_dates.is_first_date_bigger(on_date, model_train_end_date):
                difference_dates = utils_dates.diff_days(
                    on_date, model_train_end_date)
                model_for_predict = self._arima_model
                model_train_data: list = [
                    np.array([label]) for label in self._meta_data[
                        consts_model.META_DATA_KEY_DATA_TRAIN_DATA_SET]
                ]
                last_prediction = None

                for _ in range(difference_dates):
                    last_prediction = model_for_predict.forecast()[0]

                    model_train_data.append(last_prediction)
                    model_for_predict = ARIMA(
                        model_train_data, order=config_model.ORDER_FOR_MODEL)
                    model_for_predict = model_for_predict.fit(disp=0)

                return last_prediction[0]
            else:
                raise Exception(
                    "Please provide a date in the future. This model is not used to predict for the data in which it was trained to."
                )
Пример #6
0
        try:
            value = ARIMA(D_data, (p, 1, q)).fit().bic
            temp.append(value)
        except:
            temp.append(None)
        bic_matrix.append(temp)

bic_matrix = pd.DataFrame(bic_matrix)  #将其转换成Dataframe 数据结构
p, q = bic_matrix.stack().idxmin()  #先使用stack 展平, 然后使用 idxmin 找出最小值的位置

print(u'BIC 最小的p值 和 q 值:%s,%s' % (p, q))  #  BIC 最小的p值 和 q 值:0,1
#所以可以建立ARIMA 模型
model = ARIMA(data, (p, 1, q)).fit()
model.summary2()
#保存模型
model.save('model.pkl')
#模型加载
from statsmodels.tsa.arima_model import ARIMAResults

loaded = ARIMAResults.load('model.pkl')
#预测未来五个单位
predictions = loaded.forecast(5)
#预测结果为:
pre_result = predictions[0]
print(u'预测结果为:', pre_result)
#标准误差为:
error = predictions[1]
print(u'标准误差为:', error)
#置信区间为:
confidence = predictions[2]
print(u'置信区间为:', confidence)
Пример #7
0
#Fetch and split data
raw_x, raw_y = data_handler.fetch_data()
train_x, test_x = data_handler.split_data(raw_x)
train_y, test_y = data_handler.split_data(raw_y)

#Difference the whole dataset to remove seasonality
history = np.append(train_x, test_x)
differenced = data_handler.remove_seasonality(history, lag=order[0]).values
train = sys.argv[0:3]

if (train == 'train'):
    #Declare model
    model = ARIMA(differenced[:len(train_x)], order=order)
    #Fit model and save parameters
    model = model.fit(disp=1)
    model.save(path_to_model)
elif (train == 'mape'):
    mape_sum = 0
    mape_scores = list()
    #Iterates through the test set, predicting and calculating MAPE
    for seq in range(len(test_y) // output_seq_length):
        #Start with the first test week
        #We append the training set since our values will depend on those values
        #After each sequence is evaluated, its actual values become part of the training data
        model = ARIMA(differenced[:len(train_x) +
                                  (seq + 1) * output_seq_length],
                      order=order)
        #Fit model to previous values
        model = model.fit()
        preds = model.forecast(output_seq_length)[0]
        #We convert the np.float64 to integers due to the weird behaviour of matplotlib
                activation='tanh',
                input_shape=(1, 1),
                return_sequences=False))
    else:
        print('wrong option')
    model.add(Dropout(0.8))
    model.add(Dense(1))
    model.add(LeakyReLU())
    model.compile(loss='mse', optimizer='adam')
    model.fit(trainX,
              trainY,
              epochs=10,
              batch_size=10,
              validation_data=(testX, testY),
              verbose=1)
    model.save('./savedModel')

    #predict
    trainHat = model.predict(trainX)
    testHat = model.predict(testX)

    #invert
    trainHat = scaler.inverse_transform(trainHat)
    trainY = scaler.inverse_transform(trainY)
    testHat = scaler.inverse_transform(testHat)
    testY = scaler.inverse_transform(testY)

    #rmse
    trainScore = math.sqrt(mean_squared_error(trainY[:, 0], trainHat[:, 0]))
    print('Train: %.2f RMSE' % (trainScore))
    testScore = math.sqrt(mean_squared_error(testY[:, 0], testHat[:, 0]))
Пример #9
0
                                    max_ma=7,
                                    ic=['aic', 'bic', 'hqic'])
p, q = order.bic_min_order
print("p,q")
print(p, q)

# 建立ARIMA(0, 1, 1)模型
order = (p, 1, q)
train_X = diff_1_df[:]
arima_model = ARIMA(train_X, order).fit()

# 模型报告
# print(arima_model.summary2())

# 保存模型
arima_model.save('./data/arima_model.h5')

# # load model
arima_model = ARIMAResults.load('./data/arima_model.h5')

# 预测未来两天数据
predict_data_02 = arima_model.predict(start=len(train_X),
                                      end=len(train_X) + 1,
                                      dynamic=False)

# 预测历史数据
predict_data = arima_model.predict(dynamic=False)

# 逆log差分
# original_series = np.exp(train_X.values[1:] + np.log(dau.values[1:-1]))
# predict_series = np.exp(predict_data.values + np.log(dau.values[1:-1]))