def statistic(prediction, target): eI = EvaluationIndex.evalueationIndex(prediction, target) print("MSE={}\nRMSE={}\nMAPE={}".format(eI.MSE, eI.RMSE, eI.MAPE)) plt.plot(target, 'o-', label='true_data') plt.plot(prediction, 'x-', label='prediction') plt.legend() plt.title('result of {}\nRMSE{:.3f} MAPE{:.3f}'.format( model.name, eI.RMSE, eI.MAPE)) plt.show() ''' eI.plot_e() eI.plot_ape() eI.correlation() ''' return eI
import numpy as np import pyflux as pf import matplotlib.pyplot as plt import EvaluationIndex from sunspot.loader import DataPreprocess #hyerparams ar = 9 ma = 10 datapath = '../dataset/sunspot_ms.csv' DataLoader = DataPreprocess() data, (train_data, test_data) = DataLoader.arma_load_data(datapath) model = pf.ARIMA(data=data, ar=ar, ma=ma, family=pf.Normal()) prediction = np.squeeze( np.array( model.predict_is(h=len(test_data), fit_once=True, fit_method='MLE'))) prediction = DataLoader.recover(prediction) test_data = DataLoader.recover(test_data) eI = EvaluationIndex.evalueationIndex(prediction, test_data) print('MSE={}\nRMSE={}'.format(eI.MSE, eI.RMSE)) plt.plot(test_data, label='true_data') plt.plot(prediction, label='predictions') plt.title('the result of arma\nRMSE={:.2f}'.format(eI.RMSE)) plt.legend() plt.show() eI.plot_ape() eI.correlation()
# load data print(">load data...") DataLoader = DataPreprocess() x_train, y_train, x_test, y_test = DataLoader.svm_load_data(datapath, seq_len=input_dim, row=1686 - input_dim - 1) # train print(">train model...") svr = SVR(kernel=kernel, epsilon=0.0001, C=5000) svr.fit(x_train, y_train) ''' svr = LinearSVR(epsilon=0.0, C=1.0) svr.fit(x_train, y_train) ''' # predict print(">predict...") y_predict = svr.predict(x_test) y_predict = DataLoader.recover(y_predict) y_test = DataLoader.recover(y_test) eI = EvaluationIndex.evalueationIndex(y_predict, y_test) print('MSE={}\nRMSE={}'.format(eI.MSE, eI.RMSE)) plt.plot(y_test, label='true_data') plt.plot(y_predict, label='predict_data') plt.title('the result of svr\nRMSE={:.2f}'.format(eI.RMSE)) plt.legend() plt.show()
def sp500_svr_多次参数测试(): f = open("temp-test-print-out.txt", 'w') print('> Loading data... ') data_src = EvaluationIndex.loadCsvData_Np("GDP_1981-2016.csv") # plt.plot(data_src, label='data') # plt.legend() # plt.show() # 数据还源时使用 t_mean=np.mean(data_src) t_min=np.min(data_src) t_max=np.max(data_src) #数据预处理 seq_len = 5 for seq_len in range(2, 6): sequence_length = seq_len + 1 result = [] #对数据进行分块,块大小为seq_len for index in range(len(data_src) - sequence_length): result.append(np.array(data_src[index: index + sequence_length]).ravel()) # print(result) result = np.array(result) row = round(0.9 * result.shape[0]) row = 22 train = result[:int(row), :] # np.random.shuffle(train) x_train = train[:, :-1] y_train = train[:, -1] x_test = result[int(row):, :-1] y_test = result[int(row):, -1] #数据归一化 # data_normalization = EvaluationIndex.归一化.normalization_max_min_负1_1(data_src) x_train_normal = (x_train - t_min) / (t_max - t_min) y_train_normal = (y_train - t_min) / (t_max - t_min) x_test_normal = (x_test - t_min) / (t_max - t_min) y_test_normal = (y_test - t_min) / (t_max - t_min) # plt.plot(y_train_normal, label='data') # plt.legend() # plt.show() #源测试数据 y_train_normal = y_train_normal.ravel() y_test_normal = y_test_normal.ravel() print('> Data Loaded. Compiling...') ############################################################################### for j in [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]: # for k in [2, 3, 4, 5]: # svr_rbf = SVR(kernel='poly', C=j, degree=k) # svr_rbf = SVR(kernel='rbf', C=j) svr_rbf = SVR(kernel='sigmoid', C=j) svr_rbf.fit(x_train_normal, y_train_normal) y_rbf = svr_rbf.predict(x_test_normal) eI = EvaluationIndex.evalueationIndex(y_rbf, y_test_normal) # print("setp=", i, "\tC:",j, "\tdegree:",k, "\tMSE:", eI.MSE, "\tRMSE:", eI.RMSE) print(seq_len, ",", j, ",", 1, ",", eI.MSE, ",", eI.RMSE, file=f) f.close() return
def sp500_svr_simple_test_每个维归一化(): import time global_start_time = time.time() print('> Loading data... ') seq_len = 5 data_src = EvaluationIndex.loadCsvData_Np("SN_m_tot_V2.0_1990.1-2017.8.csv") # plt.plot(data_src, label='data') # plt.legend() # plt.show() # 数据还源时使用 t_mean=np.mean(data_src) t_min=np.min(data_src) t_max=np.max(data_src) #数据预处理 sequence_length = seq_len + 1 result = [] #对数据进行分块,块大小为seq_len for index in range(len(data_src) - sequence_length): result.append(np.array(data_src[index: index + sequence_length]).ravel()) # print(result) result = np.array(result) row = round(0.9 * result.shape[0]) train = result[:int(row), :] # np.random.shuffle(train) x_train = train[:, :-1] y_train = train[:, -1] x_test = result[int(row):, :-1] y_test = result[int(row):, -1] #数据归一化 data_normalization = EvaluationIndex.归一化() normalised_data = [] for window in result: normalised_window = data_normalization.normalization_max_min_负1_1(window) # normalised_window = [p / window[0] - 1 for p in window] # print(normalised_window) normalised_data.append(normalised_window.ravel()) # print(np.array(normalised_data)) # print(normalised_data) normalised_data = np.array(normalised_data) row = round(0.9 * normalised_data.shape[0]) train = normalised_data[:int(row), :] # np.random.shuffle(train) x_train_normal = train[:, :-1] y_train_normal = train[:, -1] x_test_normal = normalised_data[int(row):, :-1] y_test_normal = normalised_data[int(row):, -1] # plt.plot(y_train_normal, label='data') # plt.legend() # plt.show() #源测试数据 y_train_normal = y_train_normal.ravel() y_test_normal = y_test_normal.ravel() print('> Data Loaded. Compiling...') ############################################################################### # Fit regression model # svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) svr_rbf = SVR(kernel='rbf', C=1e3, gamma=5) svr_rbf.fit(x_train_normal, y_train_normal) # y_rbf = svr_rbf.predict(x_test_normal) y_rbf = predict_point_by_point(svr_rbf, x_test_normal) # y_rbf = predict_sequence_full(svr_rbf, x_test_normal, 5) #最大为5 # y_rbf = predict_sequences_multiple(svr_rbf, x_test_normal, 5, seq_len) #最大为5 print('Training duration (s) : ', time.time() - global_start_time) ############################################################################### # look at the results # plot_results_point(y_rbf, y_test_normal) # plot_results_full(y_rbf, y_test_normal) # plot_results_multiple(y_rbf, y_test_normal, seq_len) eI = EvaluationIndex.evalueationIndex(y_rbf, y_test_normal) eI.show() #还原 print("所有数据归一化,结果还原对比") t = (t_max-t_min) t = np.array(y_rbf)*t y_rbf_back = t + t_mean # plot_results_point(y_rbf_back, y_test) # plot_results_full(y_rbf_back, y_test) # plot_results_multiple(y_rbf_back, y_test,seq_len) eI = EvaluationIndex.evalueationIndex(y_rbf_back.ravel(), y_test) eI.show() #测试数据为归一化,lable为未归一化 print("测试数据为归一化,lable为未归一化") svr_rbf.fit(x_train_normal, y_train) y_rbf = predict_point_by_point(svr_rbf, x_test_normal) ############################################################################### eI = EvaluationIndex.evalueationIndex(y_rbf, y_test) eI.show()
def sp500_svr_simple_test_所有一起归一化2(): ''' 单值预测 :return: ''' import time global_start_time = time.time() print('> Loading data... ') data_src = EvaluationIndex.loadCsvData_Np("GDP_1981-2016.csv") # plt.plot(data_src, label='data') # plt.legend() # plt.show() # 数据还源时使用 t_mean=np.mean(data_src) t_min=np.min(data_src) t_max=np.max(data_src) #数据预处理 seq_len = 4 sequence_length = seq_len + 1 result = [] #对数据进行分块,块大小为seq_len for index in range(len(data_src) - sequence_length): result.append(np.array(data_src[index: index + sequence_length]).ravel()) # print(result) result = np.array(result) row = round(0.9 * result.shape[0]) row = 22 result_len = len(result) # np.random.shuffle(train) x_result = result[:, :-1] y_result = result[:, -1] #数据归一化 # data_normalization = EvaluationIndex.归一化.normalization_max_min_负1_1(data_src) x_result = (x_result - t_min) / (t_max - t_min) y_result = (y_result - t_min) / (t_max - t_min) # x_result = (x_result - t_mean) / np.std(x_result) # y_result = (y_result - t_mean) / np.std(x_result) y_rbf_all = [] y_test_all = [] for y_i in range(row, result_len): x_train = x_result[y_i-row:y_i] y_train = y_result[y_i-row:y_i] x_test = x_result[y_i:y_i+1] y_test = y_result[y_i:y_i+1] y_train = y_train.ravel() y_test = y_test.ravel() svr_rbf = SVR(kernel='rbf', C=10000000, gamma=0.1) # print("所有数据归一化,结果不还原") svr_rbf.fit(x_train, y_train) y_rbf = predict_point_by_point(svr_rbf, x_test) y_rbf_all.append(y_rbf) y_test_all.append(y_test) y_rbf_all = np.array(y_rbf_all) y_test_all = np.array(y_test_all) print(y_rbf_all) print(y_test_all) eI = EvaluationIndex.evalueationIndex(y_rbf_all, y_test_all) eI.show() plot_results_point(y_rbf_all, y_test_all) return
def sp500_svr_simple_test_所有一起归一化(): import time global_start_time = time.time() print('> Loading data... ') data_src = EvaluationIndex.loadCsvData_Np("GDP_1981-2016.csv") # plt.plot(data_src, label='data') # plt.legend() # plt.show() # 数据还源时使用 t_mean=np.mean(data_src) t_min=np.min(data_src) t_max=np.max(data_src) #数据预处理 seq_len = 4 sequence_length = seq_len + 1 result = [] #对数据进行分块,块大小为seq_len for index in range(len(data_src) - sequence_length): result.append(np.array(data_src[index: index + sequence_length]).ravel()) # print(result) result = np.array(result) print(result) return row = round(0.9 * result.shape[0]) row = 22 train = result[:int(row), :] # np.random.shuffle(train) x_train = train[:, :-1] y_train = train[:, -1] x_test = result[int(row):, :-1] y_test = result[int(row):, -1] print("train data") print(x_train, y_train) print("train data") print(x_test, y_test) #数据归一化 # data_normalization = EvaluationIndex.归一化.normalization_max_min_负1_1(data_src) x_train_normal = (x_train - t_min) / (t_max - t_min) y_train_normal = (y_train - t_min) / (t_max - t_min) x_test_normal = (x_test - t_min) / (t_max - t_min) y_test_normal = (y_test - t_min) / (t_max - t_min) # plt.plot(y_train_normal, label='data') # plt.legend() # plt.show() #源测试数据 y_train_normal = y_train_normal.ravel() y_test_normal = y_test_normal.ravel() print('> Data Loaded. Compiling...') ############################################################################### # Fit regression model svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.05) print("所有数据归一化,结果不还原") svr_rbf.fit(x_train_normal, y_train_normal) # y_rbf = svr_rbf.predict(x_test_normal) y_rbf = predict_point_by_point(svr_rbf, x_test_normal) eI = EvaluationIndex.evalueationIndex(y_rbf*2, y_test_normal) eI.show() print(y_rbf, y_test_normal) plot_results_point(y_rbf*2, y_test_normal)
hist = model.fit(x_train, y_train, batch_size=batchsize, nb_epoch=epochs, shuffle=True, validation_split=0) print('Training duration (s) : ', time.time() - global_start_time) plot_train(hist.history) # 在训练集上预测,看是否欠拟合 predictions = predict.predict_point_by_point(model, x_train) plt.plot(predictions, label='predict') plt.plot(y_train, label='true_data') plt.show() # 在测试集上预测,计算测量指标 print("> predict...") predictions = predict.predict_point_by_point(model, x_test) predictions = DataLoader.recover(predictions) y_test = DataLoader.recover(y_test) eI = EvaluationIndex.evalueationIndex(predictions, y_test) print("MSE={}\nRMSE={}\nMAPE={}".format(eI.MSE, eI.RMSE, eI.MAPE)) predict.plot_results_point(predictions, y_test, eI.RMSE) eI.plot_ae() eI.plot_e() eI.plot_ape() eI.correlation() print("> Train finished. save model...") model.save( model_path.format(model_name, input_dim, epochs, timesteps, eI.RMSE))