示例#1
0
def statistic(prediction, target):
    eI = EvaluationIndex.evalueationIndex(prediction, target)
    print("MSE={}\nRMSE={}\nMAPE={}".format(eI.MSE, eI.RMSE, eI.MAPE))
    plt.plot(target, 'o-', label='true_data')
    plt.plot(prediction, 'x-', label='prediction')
    plt.legend()
    plt.title('result of {}\nRMSE{:.3f}  MAPE{:.3f}'.format(
        model.name, eI.RMSE, eI.MAPE))
    plt.show()
    '''
    eI.plot_e()
    eI.plot_ape()
    eI.correlation()
    '''
    return eI
示例#2
0
import numpy as np
import pyflux as pf
import matplotlib.pyplot as plt
import EvaluationIndex
from sunspot.loader import DataPreprocess

#hyerparams
ar = 9
ma = 10
datapath = '../dataset/sunspot_ms.csv'

DataLoader = DataPreprocess()
data, (train_data, test_data) = DataLoader.arma_load_data(datapath)

model = pf.ARIMA(data=data, ar=ar, ma=ma, family=pf.Normal())
prediction = np.squeeze(
    np.array(
        model.predict_is(h=len(test_data), fit_once=True, fit_method='MLE')))

prediction = DataLoader.recover(prediction)
test_data = DataLoader.recover(test_data)
eI = EvaluationIndex.evalueationIndex(prediction, test_data)
print('MSE={}\nRMSE={}'.format(eI.MSE, eI.RMSE))
plt.plot(test_data, label='true_data')
plt.plot(prediction, label='predictions')
plt.title('the result of arma\nRMSE={:.2f}'.format(eI.RMSE))
plt.legend()
plt.show()
eI.plot_ape()
eI.correlation()
示例#3
0
# load data
print(">load data...")
DataLoader = DataPreprocess()
x_train, y_train, x_test, y_test = DataLoader.svm_load_data(datapath,
                                                            seq_len=input_dim,
                                                            row=1686 -
                                                            input_dim - 1)

# train
print(">train model...")
svr = SVR(kernel=kernel, epsilon=0.0001, C=5000)
svr.fit(x_train, y_train)
'''
svr = LinearSVR(epsilon=0.0, C=1.0)
svr.fit(x_train, y_train)
'''

# predict
print(">predict...")
y_predict = svr.predict(x_test)
y_predict = DataLoader.recover(y_predict)
y_test = DataLoader.recover(y_test)

eI = EvaluationIndex.evalueationIndex(y_predict, y_test)
print('MSE={}\nRMSE={}'.format(eI.MSE, eI.RMSE))
plt.plot(y_test, label='true_data')
plt.plot(y_predict, label='predict_data')
plt.title('the result of svr\nRMSE={:.2f}'.format(eI.RMSE))
plt.legend()
plt.show()
示例#4
0
def sp500_svr_多次参数测试():

    f = open("temp-test-print-out.txt", 'w')

    print('> Loading data... ')
    data_src = EvaluationIndex.loadCsvData_Np("GDP_1981-2016.csv")
    # plt.plot(data_src, label='data')
    # plt.legend()
    # plt.show()

    # 数据还源时使用
    t_mean=np.mean(data_src)
    t_min=np.min(data_src)
    t_max=np.max(data_src)

    #数据预处理

    seq_len = 5
    for seq_len in range(2, 6):
        sequence_length = seq_len + 1
        result = []
        #对数据进行分块,块大小为seq_len
        for index in range(len(data_src) - sequence_length):
            result.append(np.array(data_src[index: index + sequence_length]).ravel())
        # print(result)
        result = np.array(result)

        row = round(0.9 * result.shape[0])
        row = 22
        train = result[:int(row), :]
        # np.random.shuffle(train)
        x_train = train[:, :-1]
        y_train = train[:, -1]
        x_test = result[int(row):, :-1]
        y_test = result[int(row):, -1]


        #数据归一化
        # data_normalization = EvaluationIndex.归一化.normalization_max_min_负1_1(data_src)
        x_train_normal = (x_train - t_min) / (t_max - t_min)
        y_train_normal = (y_train - t_min) / (t_max - t_min)
        x_test_normal = (x_test - t_min) / (t_max - t_min)
        y_test_normal = (y_test - t_min) / (t_max - t_min)
        # plt.plot(y_train_normal, label='data')
        # plt.legend()
        # plt.show()
        #源测试数据
        y_train_normal =  y_train_normal.ravel()
        y_test_normal = y_test_normal.ravel()
        print('> Data Loaded. Compiling...')
    ###############################################################################
        for j in [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]:
            # for k in [2, 3, 4, 5]:
            #     svr_rbf = SVR(kernel='poly', C=j, degree=k)
            #     svr_rbf = SVR(kernel='rbf', C=j)
                svr_rbf = SVR(kernel='sigmoid', C=j)

                svr_rbf.fit(x_train_normal, y_train_normal)
                y_rbf = svr_rbf.predict(x_test_normal)
                eI = EvaluationIndex.evalueationIndex(y_rbf, y_test_normal)

                # print("setp=", i, "\tC:",j, "\tdegree:",k, "\tMSE:", eI.MSE, "\tRMSE:", eI.RMSE)
                print(seq_len, ",", j, ",", 1, ",", eI.MSE, ",", eI.RMSE, file=f)

    f.close()
    return
示例#5
0
def sp500_svr_simple_test_每个维归一化():
    import time
    global_start_time = time.time()
    print('> Loading data... ')
    seq_len = 5

    data_src = EvaluationIndex.loadCsvData_Np("SN_m_tot_V2.0_1990.1-2017.8.csv")
    # plt.plot(data_src, label='data')
    # plt.legend()
    # plt.show()

    # 数据还源时使用
    t_mean=np.mean(data_src)
    t_min=np.min(data_src)
    t_max=np.max(data_src)

    #数据预处理
    sequence_length = seq_len + 1
    result = []
    #对数据进行分块,块大小为seq_len
    for index in range(len(data_src) - sequence_length):
        result.append(np.array(data_src[index: index + sequence_length]).ravel())
    # print(result)
    result = np.array(result)

    row = round(0.9 * result.shape[0])
    train = result[:int(row), :]
    # np.random.shuffle(train)
    x_train = train[:, :-1]
    y_train = train[:, -1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1]


    #数据归一化
    data_normalization = EvaluationIndex.归一化()
    normalised_data = []
    for window in result:
        normalised_window = data_normalization.normalization_max_min_负1_1(window)
        # normalised_window = [p /  window[0] - 1 for p in window]
        # print(normalised_window)
        normalised_data.append(normalised_window.ravel())

    # print(np.array(normalised_data))
    # print(normalised_data)
    normalised_data = np.array(normalised_data)
    row = round(0.9 * normalised_data.shape[0])
    train = normalised_data[:int(row), :]
    # np.random.shuffle(train)
    x_train_normal = train[:, :-1]
    y_train_normal = train[:, -1]
    x_test_normal = normalised_data[int(row):, :-1]
    y_test_normal = normalised_data[int(row):, -1]

    # plt.plot(y_train_normal, label='data')
    # plt.legend()
    # plt.show()
    #源测试数据
    y_train_normal =  y_train_normal.ravel()
    y_test_normal = y_test_normal.ravel()
    print('> Data Loaded. Compiling...')
    ###############################################################################
    # Fit regression model
    # svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
    svr_rbf = SVR(kernel='rbf', C=1e3, gamma=5)
    svr_rbf.fit(x_train_normal, y_train_normal)
    # y_rbf = svr_rbf.predict(x_test_normal)
    y_rbf = predict_point_by_point(svr_rbf, x_test_normal)

    # y_rbf = predict_sequence_full(svr_rbf, x_test_normal, 5) #最大为5

    # y_rbf = predict_sequences_multiple(svr_rbf, x_test_normal, 5, seq_len) #最大为5
    print('Training duration (s) : ', time.time() - global_start_time)
    ###############################################################################
    # look at the results
    # plot_results_point(y_rbf, y_test_normal)
    # plot_results_full(y_rbf, y_test_normal)
    # plot_results_multiple(y_rbf, y_test_normal, seq_len)
    eI = EvaluationIndex.evalueationIndex(y_rbf, y_test_normal)
    eI.show()
    #还原
    print("所有数据归一化,结果还原对比")
    t = (t_max-t_min)
    t = np.array(y_rbf)*t
    y_rbf_back = t + t_mean
    # plot_results_point(y_rbf_back, y_test)
    # plot_results_full(y_rbf_back, y_test)
    # plot_results_multiple(y_rbf_back, y_test,seq_len)
    eI = EvaluationIndex.evalueationIndex(y_rbf_back.ravel(), y_test)
    eI.show()

    #测试数据为归一化,lable为未归一化
    print("测试数据为归一化,lable为未归一化")
    svr_rbf.fit(x_train_normal, y_train)
    y_rbf = predict_point_by_point(svr_rbf, x_test_normal)
    ###############################################################################
    eI = EvaluationIndex.evalueationIndex(y_rbf, y_test)
    eI.show()
示例#6
0
def sp500_svr_simple_test_所有一起归一化2():
    '''
    单值预测
    :return:
    '''
    import time
    global_start_time = time.time()
    print('> Loading data... ')

    data_src = EvaluationIndex.loadCsvData_Np("GDP_1981-2016.csv")
    # plt.plot(data_src, label='data')
    # plt.legend()
    # plt.show()

    # 数据还源时使用
    t_mean=np.mean(data_src)
    t_min=np.min(data_src)
    t_max=np.max(data_src)


    #数据预处理
    seq_len = 4
    sequence_length = seq_len + 1
    result = []
    #对数据进行分块,块大小为seq_len
    for index in range(len(data_src) - sequence_length):
        result.append(np.array(data_src[index: index + sequence_length]).ravel())
    # print(result)
    result = np.array(result)

    row = round(0.9 * result.shape[0])
    row = 22
    result_len = len(result)
    # np.random.shuffle(train)
    x_result = result[:, :-1]
    y_result = result[:, -1]

    #数据归一化
    # data_normalization = EvaluationIndex.归一化.normalization_max_min_负1_1(data_src)
    x_result = (x_result - t_min) / (t_max - t_min)
    y_result = (y_result - t_min) / (t_max - t_min)
    # x_result = (x_result - t_mean) / np.std(x_result)
    # y_result = (y_result - t_mean) / np.std(x_result)

    y_rbf_all = []
    y_test_all = []
    for y_i in range(row, result_len):
        x_train = x_result[y_i-row:y_i]
        y_train = y_result[y_i-row:y_i]
        x_test = x_result[y_i:y_i+1]
        y_test = y_result[y_i:y_i+1]

        y_train =  y_train.ravel()
        y_test = y_test.ravel()
        svr_rbf = SVR(kernel='rbf', C=10000000, gamma=0.1)

        # print("所有数据归一化,结果不还原")
        svr_rbf.fit(x_train, y_train)
        y_rbf = predict_point_by_point(svr_rbf, x_test)
        y_rbf_all.append(y_rbf)
        y_test_all.append(y_test)

    y_rbf_all = np.array(y_rbf_all)
    y_test_all = np.array(y_test_all)

    print(y_rbf_all)
    print(y_test_all)

    eI = EvaluationIndex.evalueationIndex(y_rbf_all, y_test_all)
    eI.show()
    plot_results_point(y_rbf_all, y_test_all)

    return
示例#7
0
def sp500_svr_simple_test_所有一起归一化():
    import time
    global_start_time = time.time()
    print('> Loading data... ')

    data_src = EvaluationIndex.loadCsvData_Np("GDP_1981-2016.csv")
    # plt.plot(data_src, label='data')
    # plt.legend()
    # plt.show()

    # 数据还源时使用
    t_mean=np.mean(data_src)
    t_min=np.min(data_src)
    t_max=np.max(data_src)


    #数据预处理
    seq_len = 4
    sequence_length = seq_len + 1
    result = []
    #对数据进行分块,块大小为seq_len
    for index in range(len(data_src) - sequence_length):
        result.append(np.array(data_src[index: index + sequence_length]).ravel())
    # print(result)
    result = np.array(result)

    print(result)
    return

    row = round(0.9 * result.shape[0])
    row = 22
    train = result[:int(row), :]
    # np.random.shuffle(train)
    x_train = train[:, :-1]
    y_train = train[:, -1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1]

    print("train data")
    print(x_train, y_train)
    print("train data")
    print(x_test, y_test)

    #数据归一化
    # data_normalization = EvaluationIndex.归一化.normalization_max_min_负1_1(data_src)
    x_train_normal = (x_train - t_min) / (t_max - t_min)
    y_train_normal = (y_train - t_min) / (t_max - t_min)
    x_test_normal = (x_test - t_min) / (t_max - t_min)
    y_test_normal = (y_test - t_min) / (t_max - t_min)
    # plt.plot(y_train_normal, label='data')
    # plt.legend()
    # plt.show()
    #源测试数据
    y_train_normal =  y_train_normal.ravel()
    y_test_normal = y_test_normal.ravel()
    print('> Data Loaded. Compiling...')
    ###############################################################################
    # Fit regression model
    svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.05)

    print("所有数据归一化,结果不还原")
    svr_rbf.fit(x_train_normal, y_train_normal)

    # y_rbf = svr_rbf.predict(x_test_normal)
    y_rbf = predict_point_by_point(svr_rbf, x_test_normal)

    eI = EvaluationIndex.evalueationIndex(y_rbf*2, y_test_normal)
    eI.show()
    print(y_rbf, y_test_normal)
    plot_results_point(y_rbf*2, y_test_normal)
示例#8
0
    hist = model.fit(x_train,
                     y_train,
                     batch_size=batchsize,
                     nb_epoch=epochs,
                     shuffle=True,
                     validation_split=0)
    print('Training duration (s) : ', time.time() - global_start_time)
    plot_train(hist.history)

    # 在训练集上预测,看是否欠拟合
    predictions = predict.predict_point_by_point(model, x_train)
    plt.plot(predictions, label='predict')
    plt.plot(y_train, label='true_data')
    plt.show()

    # 在测试集上预测,计算测量指标
    print("> predict...")
    predictions = predict.predict_point_by_point(model, x_test)
    predictions = DataLoader.recover(predictions)
    y_test = DataLoader.recover(y_test)
    eI = EvaluationIndex.evalueationIndex(predictions, y_test)
    print("MSE={}\nRMSE={}\nMAPE={}".format(eI.MSE, eI.RMSE, eI.MAPE))
    predict.plot_results_point(predictions, y_test, eI.RMSE)
    eI.plot_ae()
    eI.plot_e()
    eI.plot_ape()
    eI.correlation()

    print("> Train finished. save model...")
    model.save(
        model_path.format(model_name, input_dim, epochs, timesteps, eI.RMSE))