Пример #1
0
def predict_wavelet(data_array):
    """ 获取向前一步的序列预测值
    参数
    ----------
    data_array : numpy array类型
        股票价格序列.
      
    返回值
    ----------
    ans : float类型
        向前一步预测值
    """
    A, B, C = pywt.wavedec(data_array, 'db4', mode='sym', level=2)
    #利用AIC准则确定ARMA模型的阶
    order_A = sm.tsa.arma_order_select_ic(A, ic='aic')['aic_min_order']
    order_B = sm.tsa.arma_order_select_ic(B, ic='aic')['aic_min_order']
    order_C = sm.tsa.arma_order_select_ic(C, ic='aic')['aic_min_order']
    #确定阶数以后就构建ARMA模型
    model_A = ARMA(A, order=order_A)
    results_A = model_A.fit()
    model_B = ARMA(B, order=order_B)
    results_B = model_B.fit()
    model_C = ARMA(C, order=order_C)
    results_C = model_C.fit()
    #根据预测的步数确定delta
    lag = [0, 0, 1]
    # 预测小波系数
    pre_A = model_A.predict(results_A.params, 1, len(A) + lag[0])
    pre_B = model_B.predict(results_B.params, 1, len(B) + lag[1])
    pre_C = model_C.predict(results_C.params, 1, len(C) + lag[2])
    # 利用预测的小波系数重构形成最终的预测值
    predict_array = pywt.waverec([pre_A, pre_B, pre_C], 'db4')
    ans = predict_array[-1]
    return ans
Пример #2
0
def sm_arma(file='weather.npy', p=3, q=3, n=30):
    """
    Build an ARMA model with statsmodel and 
    predict future n values.

    Parameters:
        file (str): data file
        p (int): maximum order of autoregressive model
        q (int): maximum order of moving average model
        n (int): number of values to predict

    Return:
        aic (float): aic of optimal model
    """
    #Initialize the data and parameters for the data
    z = np.diff(np.load(file))
    l = len(z)

    min_aic = np.inf
    bestp = 0
    bestq = 0
    datetime_col = pd.date_range(start='04-13-2019t19:56',
                                 periods=(l),
                                 freq="1h")
    data = pd.DataFrame(z, index=datetime_col, columns=["weather"])

    #Idendifying the best model groups
    for i in range(1, p + 1):
        for j in range(1, q + 1):
            model = ARMA(z, order=(i, j))
            model = model.fit(method='mle', trend='c')
            pred = model.predict(start=0, end=(l + 30))

            aic = model.aic
            if aic < min_aic:
                min_aic = aic
                bestp = i
                bestq = j

    model = ARMA(z, order=(bestp, bestq))
    model = model.fit(method='mle', trend='c')
    pred = model.predict(start=0, end=(l + 30))

    #Plotting the best fit models and results
    plt.figure(figsize=(12, 8))
    datetime_col = pd.date_range(start='04-13-2019t19:56',
                                 periods=(l + n + 1),
                                 freq="1h")
    pred_df = pd.DataFrame(pred, index=datetime_col, columns=["weather"])
    data["weather"].plot(label="Data")
    pred_df["weather"].plot(label="Predicted")
    plt.title("Stats ARMA(" + str(bestp) + "," + str(bestq) + ")")
    plt.ylabel("Change in Temperature")
    plt.xlabel("Dates")
    plt.legend()
    plt.show()

    return min_aic
Пример #3
0
def preTest(data, size, pre_size=1):
    fre_size = pre_size - size

    index_list = np.array(data['closeIndex'])[:size]  # 最后10个数据排除用来做预测
    date_list1 = np.array(data['tradeDate'])[:size]
    predata = data
    for x in range(0, pre_size):
        predata = predata.append({'closeIndex': 0}, ignore_index=True)
    index_for_predict = np.array(predata['closeIndex'])[fre_size:]  # 预测的真实值序列
    date_list2 = np.array(predata['tradeDate'])[fre_size:]

    # 分解
    A2,D2,D1 = pywt.wavedec(index_list,'db4',mode='sym',level=2)  # 分解得到第4层低频部分系数和全部4层高频部分系数
    coeff = [A2,D2,D1]

    # 对每层小波系数求解模型系数
    order_A2 = sm.tsa.arma_order_select_ic(A2,ic='aic')['aic_min_order']   # AIC准则求解模型阶数p,q
    order_D2 = sm.tsa.arma_order_select_ic(D2,ic='aic')['aic_min_order']   # AIC准则求解模型阶数p,q
    order_D1 = sm.tsa.arma_order_select_ic(D1,ic='aic')['aic_min_order']   # AIC准则求解模型阶数p,q

    # 对每层小波系数构建ARMA模型
    # 值得注意的是,有时候用AIC准则求解的模型参数来建模会报错,这时候请调节数据时间长度。
    model_A2 =  ARMA(A2,order=order_A2)   # 建立模型
    model_D2 =  ARMA(D2,order=order_D2)
    model_D1 =  ARMA(D1,order=order_D1)

    results_A2 = model_A2.fit()
    results_D2 = model_D2.fit()
    results_D1 = model_D1.fit()

    A2_all,D2_all,D1_all = pywt.wavedec(np.array(predata['closeIndex']),'db4',mode='sym',level=2) # 对所有序列分解
    delta = [len(A2_all)-len(A2),len(D2_all)-len(D2),len(D1_all)-len(D1)] # 求出差值,则delta序列对应的为每层小波系数ARMA模型需要预测的步数


    # 预测小波系数 包括in-sample的和 out-sample的需要预测的小波系数
    pA2 = model_A2.predict(params=results_A2.params,start=1,end=len(A2)+delta[0])
    pD2 = model_D2.predict(params=results_D2.params,start=1,end=len(D2)+delta[1])
    pD1 = model_D1.predict(params=results_D1.params,start=1,end=len(D1)+delta[2])
    print(len(pA2))
    print(len(pD2))
    print(len(pD1))

    # 重构
    coeff_new = [pA2,pD2,pD1]
    denoised_index = pywt.waverec(coeff_new,'db4')


    # 输出10个预测值
    # temp_data_wt = {'real_value':index_for_predict,'pre_value_wt':denoised_index[fre_size:],'err_wt':denoised_index[fre_size:]-index_for_predict,'err_rate_wt/%':(denoised_index[fre_size:]-index_for_predict)/index_for_predict*100}
    # predict_wt = pd.DataFrame(temp_data_wt,index = date_list2,columns=['real_value','pre_value_wt','err_wt','err_rate_wt/%'])
    # print(predict_wt)
    print(denoised_index[10:])
    print(index_for_predict[10:])
    print("*********************************************************************")
Пример #4
0
def getWaves(a,b):
    price=np.array(df.iloc[a:b+1,2])
    # 小波分解
    #A8,D8,D7,D6,D5,D4,D3,D2,D1=pywt.wavedec(price,'db4',mode='sym',level=4)
    A8, D4, D3, D2, D1 = pywt.wavedec(price, 'db4', mode='sym', level=4)
    #coff=[A2,D2,D1]
    coff=[A8,D4,D3,D2,D1]
    # ARIMA定阶
    order_A8=sm.tsa.arma_order_select_ic(A8,ic='aic')['aic_min_order']
   # order_D8 = sm.tsa.arma_order_select_ic(D8, ic='aic')['aic_min_order']
   # order_D7 = sm.tsa.arma_order_select_ic(D7, ic='aic')['aic_min_order']
   # order_D6 = sm.tsa.arma_order_select_ic(D6, ic='aic')['aic_min_order']
   # order_D5 = sm.tsa.arma_order_select_ic(D5, ic='aic')['aic_min_order']
    order_D4 = sm.tsa.arma_order_select_ic(D4, ic='aic')['aic_min_order']
    order_D3 = sm.tsa.arma_order_select_ic(D3, ic='aic')['aic_min_order']
    order_D2=sm.tsa.arma_order_select_ic(D2,ic='aic')['aic_min_order']
    order_D1=sm.tsa.arma_order_select_ic(D1,ic='aic')['aic_min_order']
    #print(order_A2,order_D1,order_D2)
    #AMRA模型建模
    model_A8=ARMA(A8,order=order_A8)
    #model_D8 = ARMA(D8, order=order_D8)
    #model_D7 = ARMA(D2, order=order_D7)
    #model_D6 = ARMA(D1, order=order_D6)
    #model_D5 = ARMA(D2, order=order_D5)
    model_D4 = ARMA(D1, order=order_D4)
    model_D3 = ARMA(D2, order=order_D3)
    model_D2=ARMA(D2,order=order_D2)
    model_D1=ARMA(D1,order=order_D1)
    # 拟合数据
    result_A8=model_A8.fit()
  #  result_D8 = model_D8.fit()
  #  result_D7=model_D7.fit()
  #  result_D6 = model_D6.fit()
  #  result_D5 = model_D5.fit()
    result_D4 = model_D4.fit()
    result_D3 = model_D3.fit()
    result_D2 = model_D2.fit()
    result_D1 = model_D1.fit()

    pA8=model_A8.predict(params=result_A8.params,start=1,end=len(A8))
   # pD8 = model_D8.predict(params=result_D8.params, start=1, end=len(D8))
   # pD7 = model_D7.predict(params=result_D7.params, start=1, end=len(D7))
   # pD6 = model_D6.predict(params=result_D6.params, start=1, end=len(D6))
   # pD5 = model_D5.predict(params=result_D5.params, start=1, end=len(D5))
    pD4 = model_D4.predict(params=result_D4.params, start=1, end=len(D4))
    pD3 = model_D3.predict(params=result_D3.params, start=1, end=len(D3))
    pD2 = model_D2.predict(params=result_D2.params, start=1, end=len(D2))
    pD1 = model_D1.predict(params=result_D1.params, start=1, end=len(D1))


   # coffnew=[pA2,pD2,pD1]
    coffnew=[pA8,pD4,pD3,pD2,pD1]
    return coffnew
def arma_preds(df2, p_max=5, q_max=5, start_date=-10, end_date=5):

    df2 = index_date_convert(df2)

    for col in df2.columns:
        df2[col] = df2[col].astype(float)

    # GridSearch for p, q values of ARIMA model
    d = {}
    for p in range(1, p_max + 1):
        for q in range(1, q_max + 1):
            try:
                arma_model = ARMA(df2, (p, q)).fit()
                if arma_model.aic > 0:
                    d[(p, q)] = arma_model.aic
            except:
                continue

    lst = sorted(d.items(), key=lambda x: x[1], reverse=False)
    p, q = lst[0][0]

    # fitting to get predictions
    from_date = str(date.today().year + start_date) + '-01-01'
    to_date = str(date.today().year + end_date) + '-01-01'

    arma_model = ARMA(df2, (p, q)).fit()
    preds = arma_model.predict(from_date, to_date)
    preds_df = pd.DataFrame(preds, columns=['preds'])
    preds_df.index = preds_df.index.year

    return preds_df
Пример #6
0
def ARMA_mse_predictions(arma_order,
                         channel,
                         training_set=None,
                         test_set=None,
                         training_ixs=None,
                         step_ahead=60):
    """
    get MSE of predictions for Exercise 1 for specified ARMA order.
    """
    assert isinstance(
        arma_order,
        (tuple,
         list)) and len(arma_order) == 2, "arma_oder must be a len 2 list."
    assert isinstance(
        channel,
        int) and 0 < channel <= 6, "channel should be an int 1,2,3,4,5."
    training_set, test_set, training_ixs = _get_stairs_ar_prediction_data(
        training_set, test_set, training_ixs)
    n = len(training_set)
    assert len(
        test_set) == n, "training set and test set of different lengths."
    assert len(training_ixs
               ) == n, "training set and training_ixs of different lengths."

    mses = []
    for trn, tst, i in zip(training_set, test_set, training_ixs):
        ar_model = ARMA(trn[channel].values, order=arma_order)
        ar_model = ar_model.fit(method='css')
        forecast = ar_model.predict(start=i, end=i + step_ahead - 1)

        actual = tst[channel].values[:step_ahead]
        mses.append(np.mean((actual - forecast)**2))

    return mses
Пример #7
0
def draw_ma(ts, w):
    ma = ARMA(ts, order=(0, w)).fit(disp=-1)
    ts_predict_ma = ma.predict()

    ar = ARMA(ts, order=(w, 0)).fit(disp=-1)
    ts_predict_ar = ar.predict()

    plt.clf()
    plt.plot(ts_predict_ar, label="AR")
    plt.plot(ts_predict_ma, label="MA")
    #plt.plot(ts, label = "ORG")
    plt.legend(loc="best")
    plt.title("MA Test %s" % w)
    plt.savefig("test_ma_" + str(w) + ".pdf", format='pdf')

    return ts_predict_ma
Пример #8
0
def run_arma(original_ts, maxar=7, maxma=7):
    print(original_ts.columns[0], 'start arma')
    original_ts_log = np.log(original_ts)
    if test_stationarity(original_ts_log.ix[:, 0]) < 0.01:
        diffn = 0
        diff_original_ts_log = original_ts_log
        print('平稳,不需要差分')
    else:
        diffn = best_diff(original_ts_log, maxdiff=8)
        diff_original_ts_log = produce_diffed_timeseries(original_ts_log, diffn)
        print('差分滞后阶数为'+str(diffn)+',已完成差分')

    order = choose_order(diff_original_ts_log, maxar, maxma)
    # order = (2, 3)
    print('模型的阶数为: ' + str(order))
    model = ARMA(diff_original_ts_log.ix[:, 0], order).fit(disp='-1', method='css')

    f = model.forecast(steps=3, alpha=0.05)[0]

    p = model.predict()
    predict = predict_recover(p, original_ts_log, diffn, 'predict')
    forecast = predict_recover(f, original_ts_log, diffn, 'forecast')

    #查看niheqingkuang
    # p = model.predict()
    # p = predict_recover(p, original_ts_log, diffn, 'predict')
    # plt.plot(p)
    # plt.plot(original_ts)
    # plt.show()
    return predict, forecast
Пример #9
0
def getPredictWaveCoff(a,b,level):
    data_train=df.iloc[a:b+1,2]
    data_whole=df.iloc[a:b+29,2]
    adList=pywt.wavedec(data_train, 'db1', level=level)
    adList_all=pywt.wavedec(data_whole,'db1',level=level)
    coffnew=[]
    for i in range(len(adList)):
        delta=len(adList_all[i])-len(adList[i])
        if b==6899:
            print('aaa')
        order = sm.tsa.arma_order_select_ic(adList[i], ic='aic')['aic_min_order']
        model=ARMA(adList[i],order=order)
        try:
            result=model.fit()
        except:
            print(b+1,'except')
            model = ARMA(adList[i], order=(0, 0))
            result = model.fit()
        p=model.predict(params=result.params,start=a,end=len(adList[i])+delta)
        coffnew.append(p)
    data_predict=pywt.waverec(coffnew,'db1')
    data_predict=list(data_predict)
    l=len(data_predict)
 #   print('test',data_predict[-1],data_predict[-2],data_predict[-3],list(data_train)[-1])
    return data_predict[-29:-1]
Пример #10
0
def history_based_recommend():
    loginUser = User.query.filter_by(username=current_user.username).first()
    if loginUser is None:
        abort(404)
    portrait = Portrait.query.filter_by(user=loginUser).first()
    import pandas as pd
    from math import ceil
    data = pd.read_excel('app/main/analysis/data/timeuse.xls',
                         index_col=0)  #month为索引
    last_year_data = list(data['2016-1-01':]['use'])
    from statsmodels.tsa.arima_model import ARMA
    dta = data.diff(1)[1:]  #差分处理,提高序列稳定性
    arma_mod01 = ARMA(dta, (10, 1)).fit()
    predict_sunspots = arma_mod01.predict('2017-1-01',
                                          '2017-12-01',
                                          dynamic=True)
    predict_sunspots[0] = ceil(predict_sunspots[0] +
                               data['2016-12-01':]['use'])
    for i in range(len(predict_sunspots) - 1):
        predict_sunspots[i + 1] = ceil(predict_sunspots[i] +
                                       predict_sunspots[i + 1])
    data = list(predict_sunspots)
    return render_template('history_based_recommend.html',
                           data=data,
                           last_year_data=last_year_data,
                           portrait=portrait,
                           base64=base64)
Пример #11
0
def draw_ar(ts, w):
    arma = ARMA(ts, order=(w, 0)).fit(disp=-1)
    ts_predict = arma.predict()

    plt.clf()
    plt.plot(ts_predict, label="PDT")
    plt.plot(ts, label="ORG")
    plt.legend(loc="best")
    plt.title("AR Test %s" % w)
    plt.savefig("test_ar_" + str(w) + ".pdf", format='pdf')
Пример #12
0
def draw_ar(ts, w):
    arma = ARMA(ts, order=(w, 0)).fit(disp=-1)
    ts_predict = arma.predict()
    plt.clf()
    plt.plot(ts_predict, label="PDT")
    plt.plot(ts, label="ORG")
    plt.legend(loc="best")
    plt.title("AR Test %s" % w)
    plt.show()
    return arma  #arma.conf_int() 置信水平
def draw_ar(ts, w):
    arma = ARMA(ts, order=(w, 0)).fit(disp=-1)
    # ts_predict = arma.predict('2016', '2019', dynamic=True)
    ts_predict = arma.predict()
    plt.clf()
    plt.plot(ts_predict['2016':'2019'], 'r:', label="PDT")
    plt.plot(ts['2010':'2015'], '-', label="ORG")
    plt.legend(loc="best")
    plt.title("AR Test %s" % w)

    plt.show()
Пример #14
0
    def _ARMA(self, data: pd.Series, AR_q: int = 1, MA_p: int = 1):
        try:
            ar_ma = ARMA(data, order=(AR_q, MA_p)).fit(disp=0)
        except Exception as e:
            print(e)
            print("尝试采用其他滞后阶数")
            forecast = np.nan
        else:
            forecast = ar_ma.predict()[-1]

        return forecast
Пример #15
0
def wavelet_ARMA_model(timeseries, result_length):
	timeseries = add_predict_term_to_timeseries(timeseries, 0.0)

	index_list = np.array(timeseries)[:-result_length]
	date_list1 = np.array(timeseries.index)[:-result_length]

	index_for_predict = np.array(timeseries)[-result_length:]
	date_list2 = np.array(timeseries.index)[-result_length:]

    #分解
	A2,D2,D1 = pywt.wavedec(index_list,'db4',mode='sym',level=2)
	coeff=[A2,D2,D1]

    # 对每层小波系数求解模型系数
	order_A2 = sm.tsa.arma_order_select_ic(A2,ic='aic')['aic_min_order']
	order_D2 = sm.tsa.arma_order_select_ic(D2,ic='aic')['aic_min_order']
	order_D1 = sm.tsa.arma_order_select_ic(D1,ic='aic')['aic_min_order']

    #对每层小波系数构建ARMA模型
	model_A2 = ARMA(A2,order=order_A2)
	model_D2 = ARMA(D2,order=order_D2)
	model_D1 = ARMA(D1,order=order_D1)

	results_A2 = model_A2.fit()
	results_D2 = model_D2.fit()
	results_D1 = model_D1.fit()

	A2_all,D2_all,D1_all = pywt.wavedec(np.array(timeseries),'db4',mode='sym',level=2)
	delta = [len(A2_all)-len(A2),len(D2_all)-len(D2),len(D1_all)-len(D1)]

	pA2 = model_A2.predict(params=results_A2.params,start=1,end=len(A2)+delta[0])
	pD2 = model_D2.predict(params=results_D2.params,start=1,end=len(D2)+delta[1])
	pD1 = model_D1.predict(params=results_D1.params,start=1,end=len(D1)+delta[2])

	coeff_new = [pA2,pD2,pD1]
	denoised_index = pywt.waverec(coeff_new,'db4')

	temp_data_wt = {'pre_value':denoised_index[-result_length:]}
	Wavelet_TS = pd.DataFrame(temp_data_wt,index=date_list2,columns=['pre_value'])

	return Wavelet_TS['pre_value']
Пример #16
0
 def arma_forecast(self, ts,  p, q,):
     arma = ARMA(ts, order=(p, q)).fit(disp=-1)
     ts_predict = arma.predict()
     next_ret = arma.forecast(1)[0]
     #print("Forecast stock extra return of next day: ", next_ret)
     # plt.clf()
     # plt.plot(ts_predict, label="Predicted")
     # plt.plot(ts, label="Original")
     # plt.legend(loc="best")
     # plt.title("AR Test {},{}".format(p, q))
     # #plt.show()
     return next_ret, arma.summary2()
Пример #17
0
def Do_ARMA(WIFIAPTag, p, q, Draw=False):
    Tag_Time_Series = GetTimeSeries(WIFIAPTag)
    # ARMA model
    from statsmodels.tsa.arima_model import ARMA
    arma_mod = ARMA(Tag_Time_Series, (p, q)).fit()
    Predict = arma_mod.predict(start='2016-9-14 15:0:0',
                               end='2016-9-14 17:50:0')
    if Draw == True:
        plt.rc('figure', figsize=(12, 8))
        plt.plot(arma_mod.fittedvalues, 'r')
        plt.plot(Tag_Time_Series)
        plt.plot(Predict, 'g-')
    return Predict
Пример #18
0
def Do_ARMA(WIFIAPTag, p, q, TrainTime, PredictTime):
    Tag_Time_Series = GetTimeSeries(WIFIAPTag)
    ARMA_Time = [
        PredictTime[0] - timedelta(3),
        PredictTime[0] - timedelta(0, 0, 0, 0, 10, 0)
    ]
    #ARMA_Time = [pd.datetime(2016,9,11,6,0,0),pd.datetime(2016,9,14,15,0,0)]
    Tag_Time_Series = Get_Part_of_TimeSeries(Tag_Time_Series, ARMA_Time)
    # ARMA model
    from statsmodels.tsa.arima_model import ARMA
    arma_mod = ARMA(Tag_Time_Series, (p, q)).fit()
    Predict = arma_mod.predict(start=str(PredictTime[0]),
                               end=str(PredictTime[1]))
    return Predict
def draw_ma(ts, w):
    ma = ARMA(ts, order=(0, w)).fit(disp=-1)
    # ts_predict_ma = ma.predict('2016', '2019', dynamic=True)
    ts_predict_ma = ma.predict()

    plt.clf()
    plt.plot(ts['2010':'2015'], label="ORG")
    # plt.plot(ts_predict_ma)
    plt.plot(ts_predict_ma['2016':'2019'], ':', label="PDT")
    plt.legend(loc="best")
    plt.title("MA Test %s" % w)
    plt.show()

    return ts_predict_ma
Пример #20
0
def fit_model_and_forecast(id_list, config):

    # Cast collection of distinct time series IDs into Python list
    id_list = list(id_list)

    # Open connections to S3 File System
    s3 = s3fs.S3FileSystem()
    s3_open1 = s3.open
    s3_open2 = boto.connect_s3(host=config['s3_host'])

    # Loop over time series IDs
    for i, id in enumerate(id_list):

        # Determine S3 file path and load data into pandas dataframe
        file_path = s3.glob(config['path_training_data_parquet'] + 'ID=' + str(id) +
                            '/*.parquet')
        df_data = ParquetFile(file_path,open_with=s3_open1).to_pandas()

        # Sort time series data according to original ordering
        df_data = df_data.sort_values('ORDER')

        # Initialize dataframe to store forecast
        df_forecasts = pd.DataFrame(np.nan, index=range(0, config['len_eval']),
                                    columns=['FORECAST'])

        # Add columns with ID, true data and ordering information
        df_forecasts.insert(0, 'ID', id, allow_duplicates=True)
        df_forecasts.insert(1, 'ORDER', np.arange(1, config['len_eval'] + 1))
        df_forecasts.insert(2, 'DATA', df_data['DATA'][range((config['len_series'] -
                                                              config['len_eval']),
                                                             config['len_series'])].values,
                                                             allow_duplicates=True)

        # Loop over successive estimation windows
        for j, train_end in enumerate(range((config['len_series'] - config['len_eval'] - 1),
                                            (config['len_series'] - 1))):

            # Fit ARMA(2,2) model and forecast one-step ahead
            model = ARMA(df_data['DATA'][range(0, train_end+1)], (2, 2)).fit(disp=False)
            df_forecasts.at[j, 'FORECAST'] = model.predict(train_end+1, train_end+1)

        # Write dataframe with forecast to S3 in Parquet file format
        path = config['path_forecasts'] + 'ID=' + str(id) + '.parquet'
        write(path, df_forecasts, write_index=False, append=False, open_with=s3_open1)

        # Save fitted ARMA model to S3 in pickle file format
        path = config['path_models'] + 'ID=' + str(id) + '.model'
        with s3io.open(path, mode='w', s3_connection=s3_open2) as s3_file:
            joblib.dump(model, s3_file)
Пример #21
0
def Do_ARMA(WIFIAPTag,p,q,Draw = False):
    Tag_Time_Series = GetTimeSeries(WIFIAPTag)
    ARMA_Time = [pd.datetime(2016,9,22,6,0,0),PredictTime[0] - timedelta(0,0,0,0,10,0)]
    #ARMA_Time = [pd.datetime(2016,9,11,6,0,0),pd.datetime(2016,9,14,15,0,0)]
    Tag_Time_Series = Get_Part_of_TimeSeries(Tag_Time_Series,ARMA_Time)
    # ARMA model 
    from statsmodels.tsa.arima_model import ARMA
    arma_mod = ARMA(Tag_Time_Series,(p,q)).fit()
    Predict = arma_mod.predict(start=str(PredictTime[0]),end=str(PredictTime[1]))
    if Draw == True:
        plt.rc('figure', figsize=(12, 8))        
        plt.plot(arma_mod.fittedvalues,'r')
        plt.plot(Tag_Time_Series)
        plt.plot(Predict,'g-')
    return Predict
Пример #22
0
def draw_ar(ts, w):
    arma = ARMA(ts, order=(w,0)).fit(disp=-1)
    ts_predict = arma.predict()
    pred_error = (xdata_pred-xdata).dropna()  #计算残差
    pred_error=pred_error[pred_error>0]
    lb,p =acorr_ljungbox(pred_error,lags=lagnum)
    h=(p<0.05).sum() #p值小于0.05,认为是非白噪声
    if h>0:
        print('模型ARIMA(0,1,1)不符合白噪声检验')
    else:
        print('模型ARIMA(0,1,1)符合白噪声检验')

    plt.clf()
    plt.plot(ts_predict, label="PDT")
    plt.plot(ts, label = "ORG")
    plt.legend(loc="best")
    plt.title("AR Test %s" % w)
Пример #23
0
def ARMA_model(datasets, dim, order=(3, 3)):
    train = datasets.train_dl[::100, dim]
    test = datasets.ori_data[::10, dim]
    tempModel = ARMA(train, order).fit()
    delta = tempModel.fittedvalues - train  # 残差
    score = 1 - delta.var() / train.var()
    print('score is', score)
    # 它的值在0-1之间,越接近1,拟合效果越好
    predicts = tempModel.predict(301, 300 + len(test), dynamic=True)
    print(len(predicts))
    mse = mean_squared_error(test, predicts)
    comp = pd.DataFrame()
    comp['original'] = test
    comp['predict'] = predicts
    comp.plot()
    print(comp)
    #plt.show()
    return mse
class ts_AR:
    """
	Parameters
	------------
	ts_train:训练数据
	error_fun:使用的误差函数
	Attributes
	------------
	fittedModel:拟合的模型
	"""
    def __init__(self, ts_train, error_fun=None):
        self.ts_train = ts_train  # 训练时间序列
        self.error_fun = error_fun
        self.fittedModel = None

    def __adf_test(self):
        adftest = adfuller(self.ts_train, autolag='AIC')
        # 'Test Statistic','p-value','Lags Used','Number of Observations Used',XX
        # (-0.0, 0.958532086060056, 9, 10, {'1%': -4.331573, '5%': -3.23295, '10%': -2.7487}, -582.412544847778)
        adf_res = pd.Series(adftest[0:4],
                            index=[
                                'Test Statistic', 'p-value', 'Lags Used',
                                'Number of Observations Used'
                            ])
        for key, value in adftest[4].items():
            adf_res['Critical Value (%s)' % key] = value
        return adf_res

    def fit(self):
        adf_res = self.__adf_test()
        w = int(adf_res['Lags Used'])
        self.fittedModel = ARMA(self.ts_train, order=(w, 0)).fit(disp=-1)

    def get_fittedvalues(self):  #预测拟合值
        fittedvalues = self.fittedModel.predict(
        )  # 等价于self.fittedModel.fittedvalues Series类型
        fittedvalues = np.round(fittedvalues)  # 将预测的销量四舍五入
        return fittedvalues  #array类型

    def predict(self, forcast_period=2):
        y_forcast = self.fittedModel.forecast(forcast_period)[0]
        y_forcast = np.round(y_forcast)  # 将预测的销量四舍五入
        return y_forcast
Пример #25
0
def ARMA_plot_predictions(arma_order,
                          channel,
                          training_set=None,
                          test_set=None,
                          training_ixs=None,
                          title=None):
    """
    plot predictions for Exercise 1 for specified ARMA order.
    """
    assert isinstance(
        arma_order,
        (tuple,
         list)) and len(arma_order) == 2, "arma_oder must be a len 2 list."
    assert isinstance(
        channel,
        int) and 0 < channel <= 6, "channel should be an int 1,2,3,4,5."
    training_set, test_set, training_ixs = _get_stairs_ar_prediction_data(
        training_set, test_set, training_ixs)
    n = len(training_set)
    assert len(
        test_set) == n, "training set and test set of different lengths."
    assert len(training_ixs
               ) == n, "training set and training_ixs of different lengths."

    f, axs = plt.subplots(*_splt_gridsize(n))
    axs = axs.ravel()
    for trn, tst, i, ax in zip(training_set, test_set, training_ixs, axs):
        ar_model = ARMA(trn[channel].values, order=arma_order)
        ar_model = ar_model.fit(method='css')
        forecast = ar_model.predict(start=i, end=i + 59)
        ax.plot(np.arange(i - 60, i), trn[channel][-60:])
        ax.plot(np.arange(i, i + 60), tst[channel].values)
        ax.plot(np.arange(i, i + 60), forecast)
        ax.axvline(i, color='k', linestyle=':')
    f.set_size_inches(8, 6)
    if title:
        f.suptitle(title)
    plt.tight_layout()
Пример #26
0
    def wrangle(self):
        print("Wrangling data for %s ... " % self.NAME)
        sp500 = self.row_data['sp500']
        vix = self.row_data['vix']

        sp500['o'] = np.log(sp500['Open']) - np.log(sp500['Close'].shift(1))
        sp500['u'] = np.log(sp500['High']) - np.log(sp500['Open'])
        sp500['d'] = np.log(sp500['Low']) - np.log(sp500['Open'])
        sp500['c'] = np.log(sp500['Close']) - np.log(sp500['Open'])

        #suppose n=20
        n = 20
        sp500['rs'] = sp500['u'] * (sp500['u'] - sp500['c']) + sp500['d'] * (
            sp500['d'] - sp500['c'])
        sp500['V_rs'] = sp500['rs'].rolling(window=n).sum() / n
        series_vo = (sp500['o'] - sp500['o'].rolling(window=n).mean())**2
        sp500['V_o'] = series_vo.rolling(window=n).sum() / (n - 1)
        series_vc = (sp500['c'] - sp500['c'].rolling(window=20).mean())**2
        sp500['V_c'] = series_vc.rolling(window=n).sum() / (n - 1)

        k = 0.34 / (1.34 + (n + 1) / (n - 1))
        sp500['V'] = sp500['V_o'] + k * sp500['V_c'] + (1 - k) * sp500['V_rs']

        #GARCH style, actually is ARMA to variance
        model = ARMA(sp500['V'][39:], order=(1, 1)).fit()

        vix = vix.iloc[78:]
        vix['forecast'] = np.sqrt(model.predict() * 252) * 100

        vix['VRP'] = vix['Close'] - vix['forecast']
        vix = vix[(vix['Date'] >= '1990-06-08')
                  & (vix['Date'] <= '2019-11-05')]
        vix = vix.set_index('Date')
        vix.index = pd.to_datetime(vix.index)

        self.wrangled_data = vix[['VRP']]
Пример #27
0
    return tmp_data


diffed_ts = diff_ts(dta_log,d=[1,1])
test_stationarity.testStationarity(diffed_ts)
test_stationarity.draw_acf_pacf(diffed_ts,l=31) 
model = arima_model(diffed_ts)
pdb.set_trace()
model.get_proper_model()
print 'bic:',model.bic,'p:',model.p,'q:',model.q
print model.properModel.forecast()[0]
# print model.forecast_next_day_value(type='day')

model2=ARMA(diffed_ts,(model.p,1,model.q)).fit()
model2.summary2()
predict_sunspots = model2.predict('2090','2100',dynamic=True)
a = model2.forecast(5)[0]
a_ts = predict_diff_recover(a,d=[1,1])
log_a = np.exp(a_ts)


print log_a
pdb.set_trace()



model.certain_model(6,0)

predict_ts = model.properModel.predict()
diff_recover_ts = predict_diff_recover(predict_ts,d=[1,1])
log_recover = np.exp(diff_recover_ts)
Пример #28
0
plt.style.use('fivethirtyeight')

df = pd.read_excel("../00Daily/Egypt.xlsx", squeeze=True, parse_dates=True)
df = df[["Date", "LocalTransmission"]]
df.set_index("Date", inplace=True)
df.dropna(inplace=True)
##df['Date'] = pd.to_datetime(df['Date'])
LocalTransmission = df['LocalTransmission'].astype('int32')
#print (df.head())
print(df.index)

result = ARMA(df, order=(0, 1)).fit(disp=False)
print(result.summary())
#print(result.params)
predictions = result.predict(start="2020-03-01", end="2020-05-01")
#accuracy = result.score()
print(predictions)
##accuracy = result.score()
#print (accuracy)

result.plot_predict(start="2020-03-01", end="2020-05-01")
plt.suptitle('Prediction for postive cases in Egypt \n Algorithm used: MA',
             fontsize=12)

plt.show()

##def mean_forecast_error(y, yhat):
##    return y.sub(yhat).mean()

Пример #29
0
result_D1 = model_D1.fit()'''

'''plt.subplot(312)
plt.plot(D2,'red')
plt.plot(result_D2.fittedvalues,'blue')
plt.title('D2')
plt.subplot(313)
plt.plot(D1,'red')
plt.plot(result_D1.fittedvalues,'blue')
plt.title('D1')
plt.show()
#分解所有的序列
A2_all,D2_all,D1_all = pywt.wavedec(np.array(data['weiyi']),'db4',mode='sym',level=2)
detal = [len(A2_all)-len(A2),len(D2_all)-len(D2),len(D1_all)-len(D1)]
print(detal)'''
pqxwy1 = model_qxwy1.predict(params=result_qxwy1.params,start=0,end=len(data['value'])+10)
plt.figure()
plt.plot(data['value'],'red')
plt.plot(result_qxwy1.fittedvalues,'blue')
plt.plot(pqxwy1,'green')
plt.title('qxwy')
plt.show()
'''pD2 = model_D2.predict(params=result_D2.params,start=0,end=len(D2)+detal[1])
pD1 = model_D1.predict(params=result_D1.params,start=0,end=len(D1)+detal[1])'''

'''for i in range(1,len(coeff)):
    coeff[i]=pywt.threshold(coeff[i],value=na[i-1],mode='hard')

#重构
rsc_data = pywt.waverec(coeff,'db4',mode='sym')
plt.figure
Пример #30
0
plt.title('Partial Autocorrelation Function (p=1)')
plt.tight_layout()

'''
In this plot, the two dotted lines on either sides of 0 are the confidence interevals.
These can be used to determine the p and q values as:

- p: The lag value where the PACF chart crosses the upper confidence interval for the first time, in this case p=1.

- q: The lag value where the ACF chart crosses the upper confidence interval for the first time, in this case q=1.
'''

'''
### Fit ARMA model with statsmodels

1. Define the model by calling `ARMA()` and passing in the p and q parameters.

2. The model is prepared on the training data by calling the `fit()` function.

3. Predictions can be made by calling the `predict()` function and specifying the index of the time or times to be predicted.
'''

from statsmodels.tsa.arima_model import ARMA


model = ARMA(x, order=(1,1)).fit() # fit model

print(model.summary())
plt.plot(x)
plt.plot(model.predict(), color='red')
plt.title('RSS: %.4f'% sum((model.fittedvalues-x)**2))
Пример #31
0
plt.plot(d2, 'blue')
plt.plot(result_d2.fittedvalues, 'red')
plt.title('model_d2')
plt.subplot(3, 1, 3)
plt.plot(d1, 'blue')
plt.plot(result_d1.fittedvalues, 'red')
plt.title('model_d1')
plt.show()
a2_all, d2_all, d1_all = pywt.wavedec(list_hourly_load,
                                      'db4',
                                      mode='sym',
                                      level=2)
delta = [len(a2_all) - len(a2), len(d2_all) - len(d2), len(d1_all) - len(d1)]
print(delta)
pa2 = model_a2.predict(params=result_a2.params,
                       start=1,
                       end=len(a2) + delta[0])
pd2 = model_d2.predict(params=result_d2.params,
                       start=1,
                       end=len(d2) + delta[1])
pd1 = model_d1.predict(params=result_d1.params,
                       start=1,
                       end=len(d1) + delta[2])
predict_values = pywt.waverec([pa2, pd2, pd1], 'db4')
print(np.shape(predict_values))
plt.plot(list_hourly_load[20710:20758], label="$Observed$", c='green')
plt.plot(predict_values[20710:20758], label="$Predicted", c='red')
plt.xlabel('Hour')
plt.ylabel('Electricity load, kW')
plt.show()
# mape = statistics.mape([y_test_true[i]*1000 for i in range(0,len(y_test_true))],(predicted_values)*1000
Пример #32
0
	print 'Results of Dickey-Fuller Test:'
	dftest = adfuller(timeseries, autolag='AIC')
	dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
	for key,value in dftest[4].items():
		dfoutput['Critical Value (%s)'%key] = value
	print dfoutput


ts_log = np.log(ts)
# ts_log_diff = ts_log - ts_log.shift()
# ts_log_diff.dropna(inplace=True)
# test_stationarity(ts_log_diff)

from statsmodels.tsa.arima_model import ARIMA, ARMAResults, ARMA

arma_mod30 = ARMA(ts_log , (2,1)).fit()
# # print ts_log
# # print arma_mod30
predict_sunspots = arma_mod30.predict('26-09-2014 00:00', '26-04-2015 23:00', dynamic=True)
p_exp = np.exp(predict_sunspots)
print len(p_exp)

# from math import round

f = open("output.csv","w")
f.write("Datetime,Count\n")
for i in range(1,len(p_exp)):
	f.write(datetime_col[i-1] + "," + str(int(round(p_exp[i]))))
	f.write("\n")
f.write("26-04-2015 23:00,"+str(int(round(p_exp[5111]))))
f.write("\n")