示例#1
0
def getLikelihood(endog,exog, order = None,n_forecasted_data=1):
    
    '''
    train_en = endog[:predict_start-1]
    test_en = endog[predict_start:]
    print train_en
    print test_en
    train_ex = exog[:predict_start-1]
    test_ex = exog[predict_start:]
    '''
    # Automatically determine values of orders
    if order is None:
        from scipy.optimize import brute
        grid = (slice(1, 3, 1), slice(1, 3, 1),slice(0, 3, 1))
        
        print "############################################"
        print endog
        print "############################################"
        
        try: 
            order =  brute(objfunc, grid, args=(exog, endog), finish=None)
            order = order.astype(int)
        except :
            order = [1,1,3]
        # Model fits given data (endog) with optimized order
        
        
    print "*********************************************"
    print "Choose order of ",
    print order
    print "*********************************************"
    
    model = ARIMA(endog,order).fit(full_output=False,disp=False)
    
    # 1st element of array x is the forecasted data.
    x = model.forecast(n_forecasted_data)
    return x[0]
# fit and forecasting model model
model1_x = ExponentialSmoothing(history1,
                                seasonal_periods=7,
                                seasonal='add',
                                trend='add').fit()
y1_x = model1_x.forecast(steps=7)  # to predict one steps into the future

model1_z = ExponentialSmoothing(history2,
                                seasonal_periods=7,
                                seasonal='add',
                                trend='add').fit()
y1_z = model1_z.forecast(steps=7)  # to predict one steps into the future

model2_x = ARIMA(history1, order=(0, 1, 1)).fit(disp=0)
y2_x = model2_x.forecast(steps=7)

model2_z = ARIMA(history2, order=(2, 0, 0)).fit(disp=0)
y2_z = model2_z.forecast(steps=7)

model3_x = sm.tsa.statespace.SARIMAX(history1,
                                     order=(1, 1, 1),
                                     seasonal_order=(1, 1, 0, 12),
                                     enforce_stationarity=False,
                                     enforce_invertibility=False).fit()
y3_x = model3_x.forecast(steps=1)

model3_z = sm.tsa.statespace.SARIMAX(history2,
                                     order=(1, 1, 1),
                                     seasonal_order=(1, 1, 0, 12),
                                     enforce_stationarity=False,
示例#3
0
文件: 总代码.py 项目: xiaocnj/-
qmax = int(len(D_O3) / 10)  #一般阶数不超过length/10
e_matrix = []  #评价矩阵
for p in range(pmax + 1):
    tmp = []
    for q in range(qmax + 1):
        try:  #存在部分报错,所以用try来跳过报错。
            tmp.append(ARIMA(O3last2month, (p, 1, q)).fit().aic)
        except:
            tmp.append(None)
    e_matrix.append(tmp)
e_matrix = pd.DataFrame(e_matrix)  #从中可以找出最小值
p, q = e_matrix.stack().idxmin()  #先用stack展平,然后用找出最小值位置。
print('AIC最小的p值和q值为:%s、%s' % (p, q))
model = ARIMA(O3last2month, (p, 1, q)).fit()
model.summary2()  #给出模型报告
print(model.forecast(5))  #作为期5天的预测,返回预测结果、标准误差、置信区间。
preO3 = model.forecast(1)[0]
#PM2.5
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(data10.loc[:, 'PM2.5'])
from statsmodels.stats.diagnostic import acorr_ljungbox
print('白噪声-检验结果:', acorr_ljungbox(data1.loc[:, 'PM2.5'], lags=1))
from statsmodels.tsa.stattools import adfuller as ADF
print('ADF-检验结果:', ADF(data10.loc[:, 'PM2.5']))

PM25last2month = data10.iloc[-60:, 2]
from statsmodels.tsa.arima_model import ARIMA
PM25last2month = PM25last2month.astype(float)
pmax = int(len(PM25last2month) / 10)  #一般阶数不超过length/10
qmax = int(len(PM25last2month) / 10)  #一般阶数不超过length/10
e_matrix = []  #评价矩阵
示例#4
0
print('差分序列的ADF检验结果为:', ADF(D_data['销量差分']))  # 平稳性检测

# 白噪声检验
from statsmodels.stats.diagnostic import acorr_ljungbox

print('差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1))  # 返回统计量和p值

from statsmodels.tsa.arima_model import ARIMA

# 定阶
data['销量'] = data['销量'].astype(float)
pmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
qmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
bic_matrix = []  # BIC矩阵
for p in range(pmax + 1):
    tmp = []
    for q in range(qmax + 1):
        try:  # 存在部分报错,所以用try来跳过报错。
            tmp.append(ARIMA(data, (p, 1, q)).fit().bic)
        except:
            tmp.append(None)
    bic_matrix.append(tmp)

bic_matrix = pd.DataFrame(bic_matrix)  # 从中可以找出最小值

p, q = bic_matrix.stack().idxmin()  # 先用stack展平,然后用idxmin找出最小值位置。
print('BIC最小的p值和q值为:%s、%s' % (p, q))
model = ARIMA(data, (p, 1, q)).fit()  # 建立ARIMA(0, 1, 1)模型
print('模型报告为:\n', model.summary2())
print('预测未来5天,其预测结果、标准误差、置信区间如下:\n', model.forecast(5))
示例#5
0
def get_moving_average_growth_rate_and_prediction(input_filename,
                                                  state_name='Karnataka'):
    matplotlib.use('Agg')
    india_covid_19 = pd.read_csv(input_filename)  #1st problem
    india_covid_19['Date'] = pd.to_datetime(india_covid_19['Date'],
                                            dayfirst=True)
    all_state = list(india_covid_19['State/UnionTerritory'].unique())
    all_state.remove('Unassigned')
    latest = india_covid_19[india_covid_19['Date'] > '30-01-20']
    state_cases = latest.groupby('State/UnionTerritory')[
        'Confirmed', 'Deaths', 'Cured'].max().reset_index()
    latest['Active'] = latest['Confirmed'] - (latest['Deaths'] -
                                              latest['Cured'])
    state_cases = state_cases.sort_values('Confirmed',
                                          ascending=False).fillna(0)
    states = list(state_cases['State/UnionTerritory'][0:15])

    states_confirmed = {}
    states_deaths = {}
    states_recovered = {}
    states_active = {}
    states_dates = {}

    for state in states:
        df = latest[latest['State/UnionTerritory'] == state].reset_index()
        k = []
        l = []
        m = []
        n = []
        for i in range(1, len(df)):
            k.append(df['Confirmed'][i] - df['Confirmed'][i - 1])
            l.append(df['Deaths'][i] - df['Deaths'][i - 1])
            m.append(df['Cured'][i] - df['Cured'][i - 1])
            n.append(df['Active'][i] - df['Active'][i - 1])
        states_confirmed[state] = k
        states_deaths[state] = l
        states_recovered[state] = m
        states_active[state] = n
        date = list(df['Date'])
        states_dates[state] = date[1:]

    fig = plt.figure(figsize=(25, 17))

    plt.suptitle('5-Day Moving Average of Confirmed Cases in Top 15 States',
                 fontsize=20,
                 y=1.0)
    k = 0
    for i in range(1, 15):
        ax = fig.add_subplot(5, 3, i)
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%d-%b'))
        ax.bar(states_dates[states[k]],
               states_confirmed[states[k]],
               label='Day wise Confirmed Cases ')
        moving_aves = calc_movingaverage(states_confirmed[states[k]], 5)
        ax.plot(states_dates[states[k]][:-2],
                moving_aves,
                color='red',
                label='Moving Average',
                linewidth=3)
        plt.title(states[k], fontsize=20)
        handles, labels = ax.get_legend_handles_labels()
        fig.legend(handles, labels, loc='upper left')
        k = k + 1
    plt.tight_layout(pad=3.0)

    #First output
    moving_average_fig = fig
    filename = 'coronavirus_reports/' + datetime.date.today().strftime(
        "%Y-%m-%d") + '_00-00-00_' + 'coronavirus-MovingAverageGraph.png'
    moving_average_fig.savefig(filename)
    filename = 'static/' + str(datetime.date.today(
    )) + '_00-00-00_' + 'coronavirus-MovingAverageGraph.png'
    moving_average_fig.savefig(filename)

    fig = plt.figure(figsize=(25, 17))
    plt.suptitle('Growth Rate in Top 15 States', fontsize=20, y=1.0)
    k = 0
    for i in range(1, 15):
        ax = fig.add_subplot(5, 3, i)
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%d-%b'))
        #ax.bar(states_dates[states[k]],states_confirmed[states[k]],label = 'Day wise Confirmed Cases ')
        growth_rate = calc_growthRate(states_confirmed[states[k]])
        ax.plot_date(states_dates[states[k]][21:],
                     growth_rate[20:],
                     color='#9370db',
                     label='Growth Rate',
                     linewidth=3,
                     linestyle='-')
        plt.title(states[k], fontsize=20)
        handles, labels = ax.get_legend_handles_labels()
        fig.legend(handles, labels, loc='upper left')
        k = k + 1
    plt.tight_layout(pad=3.0)

    growth_rate_graph_fig = fig
    filename = 'coronavirus_reports/' + datetime.date.today().strftime(
        "%Y-%m-%d") + '_00-00-00_' + 'coronavirus-GrowthRateGraph.png'
    growth_rate_graph_fig.savefig(filename)
    filename = 'static/' + str(datetime.date.today(
    )) + '_00-00-00_' + 'coronavirus-GrowthRateGraph.png'
    growth_rate_graph_fig.savefig(filename)

    k = india_covid_19[india_covid_19['State/UnionTerritory'] ==
                       state_name].iloc[:, [1, 8]]

    data = k.values
    data = k

    arima = ARIMA(data['Confirmed'], order=(5, 1, 0))
    arima = arima.fit(trend='c', full_output=True, disp=True)
    forecast = arima.forecast(steps=30)
    pred = list(forecast[0])

    start_date = data['Date'].max()
    prediction_dates = []
    for i in range(30):
        date = start_date + datetime.timedelta(days=1)
        prediction_dates.append(date)
        start_date = date
    fig = plt.figure(figsize=(15, 10))
    plt.xlabel("Dates", fontsize=20)
    plt.ylabel('Total cases', fontsize=20)
    plt.title("Predicted Values for the next 15 Days for " + state_name,
              fontsize=20)

    plt.plot_date(y=pred,
                  x=prediction_dates,
                  linestyle='dashed',
                  color='#ff9999',
                  label='Predicted')
    plt.plot_date(y=data['Confirmed'],
                  x=data['Date'],
                  linestyle='-',
                  color='blue',
                  label='Actual')
    plt.legend()

    prediction_fig = fig

    filename = 'coronavirus_reports/' + str(datetime.date.today(
    )) + '_00-00-00_' + 'coronavirus_Prediction_' + state_name + '.png'
    prediction_fig.savefig(filename)
    filename = 'static/' + str(datetime.date.today(
    )) + '_00-00-00_' + 'coronavirus_Prediction_' + state_name + '.png'
    prediction_fig.savefig(filename)
    def make_forecast(self, data_df):

        model = ARIMA(data_df, order=self.best_model_order).fit(disp=False)
        forecast_list = model.forecast(steps=self.steps)[0].tolist()

        return forecast_list
示例#7
0
# 白噪声检验
from statsmodels.stats.diagnostic import acorr_ljungbox
# 返回统计量和p值
print(u"差分序列的白噪声检验结果为:{}".format(acorr_ljungbox(D_data, lags=1)))

from statsmodels.tsa.arima_model import ARIMA
# 模型定阶
data[u'销量'] = data[u'销量'].astype(float)  # 注意训练时序模型时要传进去的是float型
pmax = int(len(D_data) / 10)  # 一般阶数不超过长度的十分之一
qmax = int(len(D_data) / 10)
bic_mat = []
for p in range(pmax + 1):
    tmp = []
    for q in range(qmax + 1):
        try:  #	拟合原序列
            # 人为观察出来用MA(1)模型拟合差分序列,即对1阶差分后的原数据进行ARIMA(p,1,q)模型
            tmp.append(ARIMA(data, (p, 1, q)).fit().bic)
        except:
            tmp.append(None)
    bic_mat.append(tmp)

bic_mat = pd.DataFrame(bic_mat)
print(bic_mat)
p, q = bic_mat.stack().idxmin()
print(u"bic最小的p值和q值为:{},{}".format(p, q))
# 建立ARIMA(0,1,1)模型
model = ARIMA(data, (p, 1, q)).fit()
print(model.summary2())
# 作为期5天的预测,返回预测结果,标准误差,置信区间
print(model.forecast(5))
plot_pacf(data['AvgNetFare'], lags=30)

plt.show()

len(data) - 30

xTrain, xTest = data['AvgNetFare'][:406], data['AvgNetFare'][406:]

#len(xTest)
xTest

arima = ARIMA(xTrain, order=(10, 2, 1))
arima = arima.fit()
arima.summary()

pred = arima.forecast(steps=len(xTest))

print(mean_squared_error(xTest, pred[0]))
print(np.sqrt(mean_squared_error(xTest, pred[0])))

#pred

ax = arima.plot_predict(start='2019-05-12', end='2019-06-10')
ax.set_figheight(9)
ax.set_figwidth(19)

import itertools
"""Auto Arima"""

auto = auto_arima(xTrain,
                  start_p=0,
示例#9
0
# @Author  : Aries
# @Site    :
# @File    : arima_model_check.py
# @Software: PyCharm
#模型检验
import pandas as pd

#参数初始化
discfile = u'拓展思考样本数据.xls'
lagnum = 12  #残差延迟个数

data = pd.read_excel(discfile, index_col=u'日期')
xdata = data[u'日志类告警']
print xdata

from statsmodels.tsa.arima_model import ARIMA  #建立ARIMA(0,1,1)模型

arima = ARIMA(xdata.astype(float), (1, 1, 4)).fit()  #建立并训练模型
xdata_pred = arima.predict(typ='levels')  #预测
print xdata_pred
print arima.forecast(2)
pred_error = (xdata_pred - xdata).dropna()  #计算残差

from statsmodels.stats.diagnostic import acorr_ljungbox  #白噪声检验

lb, p = acorr_ljungbox(pred_error, lags=lagnum)
h = (p < 0.05).sum()  #p值小于0.05,认为是非白噪声。
if h > 0:
    print(u'模型ARIMA(0,1,1)不符合白噪声检验')
else:
    print(u'模型ARIMA(0,1,1)符合白噪声检验')
# print(data.head(3))
data['销量'] = data['销量'].astype(float)
# print(data.head(3))

p_max = int(len(D_data) / 10)  # 一般不超过len/10
q_max = int(len(D_data) / 10)  # 一般不超过len/10
# print(p_max)
bic_matrix = []

for p in range(p_max + 1):
    temp = []
    for q in range(q_max + 1):
        try:
            temp.append(ARIMA(data, (p, 1, q)).fit().bic)
        except:
            temp.append(None)
    bic_matrix.append(temp)

# print(bic_matrix)
bic_df = pd.DataFrame(bic_matrix)
# print(bic_df)

p, q = bic_df.stack().idxmin()  # 先用stack展平,再用idxmin找出最小值位置
print("BIC中p和q分别为: {p}、{q}".format(p=p, q=q))

model = ARIMA(data, (p, 1, q)).fit()  # 建立模型

print('输出模型报告:', '\n', model.summary2())
print('输出预测5的结果:', '\n', model.forecast(5))  # 预测值、标准误差、置信区间
# print(model.summary.tables[1])
示例#11
0
for p in range(pmax+1):
    tmp = []
    for q in range(qmax+1):
        try:
            tmp.append(ARIMA(data,(p,1,q)).fit().bic)
        except:
            tmp.append(None)
    bic_matrix.append(tmp)


bic_matrix = pd.DataFrame(bic_matrix) #从中找出最小值
print(bic_matrix)

p,q = bic_matrix.stack().idxmin() #使用stack展平 然后找出最小值位置 
print('bic最小的p和最小的q为: %s \ %s'  %(p,q))
model = ARIMA(data,(p,1,q)).fit() #建立模型arima(0,1,1)
result = model.summary2()
print(result)

test = model.forecast(5) #给出未来五天的预测 返回预测结果 标准误差 置信区间
print(test)









示例#12
0
def time_series(datas):
    from flask import request
    # 获取post请求参数
    # datas = request.get_json()
    # print(datas)
    #请求数据
    # data_input, callback_flag = parse_datas(datas)
    frame, callback_flag, db_map = db_data(datas)
    file_data, data, train_data, pred_data, step_month, date_next_list = parse_datas(
        frame, db_map)
    k = stationarityTest(data)
    if_black = whitenoiseTest(data)
    p, q = findOptimalpq(train_data, k)
    model_if_white, pred = arimaModelCheck(train_data, p, k, q)
    result, R2_score, assess = calErrors(pred_data, pred)
    #测试数据
    callback_flag = datas['callbackFlag']
    print('callback_flag', callback_flag)
    #前端输入参数
    input_p = datas['data']['selected_thisTime']['p']
    input_q = datas['data']['selected_thisTime']['q']

    # k =stationarityTest()  #平稳性检验,返回差分阶数k
    print('k', k)
    if k <= 5:  # 自行规定,最多差分5次
        # if_black  = whitenoiseTest()  #白噪声检测,如果if_black=1,即为非白噪声
        if if_black == 1:  #非白噪声序列,需要提取信息
            if callback_flag == 0:
                p, q = findOptimalpq(train_data, k)  #通过计算,获取p,q最合适的值
            else:
                p, q = input_p, input_q  #callback_flag =1时,回调,由前端输入参数
            print('p', p)
            print('q', q)
            model_if_white, pred = arimaModelCheck(train_data, p, k, q)
            if model_if_white == 1:  #残差属于白噪声序列,无需再提取,可进行下一步
                result, R2_score, assess = calErrors(pred_data, pred)
                print(result)

                #对所有样本数据建模,进行样本外预测
                xdata = file_data['Y']

                # 建立并训练模型
                arima = ARIMA(xdata, (p, k, q)).fit()
                predict = arima.forecast(predictnum)[0]  # 预测样本之外的5个时间单位,取其第一行
                print('predict', predict)

                predict_list = []
                for i in range(len(predict)):
                    out_put = {"date": date_next_list[i], "Y": predict[i]}
                    predict_list.append(out_put)
                print("预测下5个月份/季度的数据", predict_list)
                if_callback = 0

            else:  #残差为非白噪声序列,需要重新调整p,q
                predict_list = []
                R2_score = ""
                assess = ""
                if_callback = 1  # 告诉后端,强制回调,下面的值都为空即可
    else:
        print("注意:该数据不适合建立时间序列模型!")

    return_data = {
        "pass_data": {},
        "display_data": predict_list,
        "display_data_type": "",
        "model_assess": {
            "模型评分": R2_score,
            "模型评价": assess
        },
        "if_display": 0,
        "display_info": ["display_data", "model_assess"],
        "if_callback": if_callback,
        "args": {
            "list": ["p", "q"],
            "selected_thisTime": {
                "p": p,
                "q": q
            },
            "selected_lastTime": {},
            "args_display_type":
            "select",
            "args_info":
            "本次操作说明:需要调整参数p和q的值,步长为1, 取值范围[0,5]。其中,p是自回归(AR)的项数,用来获取自变量;q是移动平均(MA)的项数,为了使其光滑"
        },
        "return_data_instructions":
        "if_callback为0时,可继续下一步也可回调,请将该节点的结果data保存,并传给下一节点使用;if_callback为1时,表示强制回调。",
        "others": ""
    }
    print(return_data)
    return return_data
示例#13
0
# we therefore use training length of 53 to train our model
dtw_forecast = dtw_pred(test.cumulative_cases, 38, 30)

simple_plot(test.cumulative_cases[-30:])
simple_plot(dtw_forecast)
res = test.cumulative_cases[-30:] - dtw_forecast
res = sum(res**2)  # residual of 1.26E9, not bad! but not the best obviously
simple_plot(test.cumulative_cases)

##################### final model summary
#arima
ON_cases = covid[covid.province == 'Ontario'].cumulative_cases
arima_train = fast_log(ON_cases)[70:]
pq_search(arima_train, 3, 2, 2, 3, 0.05)  # min AIC is at 321
arima_model = ARIMA(arima_train, (0, 2, 3)).fit(disp=False)
arima_forecast, se, conf = arima_model.forecast(30,
                                                alpha=0.05)  # 30 days forecast
arima_forecast = np.exp(arima_forecast)
arima_forecast = pd.Series(arima_forecast)
lower_forecast = np.exp(pd.Series(conf[:, 0]))
upper_forecast = np.exp(pd.Series(conf[:, 1]))
# exponential smoothing
expsm_model = ExponentialSmoothing(ON_cases,
                                   trend='mul',
                                   seasonal=None,
                                   damped=True).fit()
expsm_forecast = expsm_model.forecast(30)

# dtw
dtw_train = ON_cases.append(pd.Series([0] * 30))
dtw_forecast = dtw_pred(dtw_train, 38, 30)
dtw_forecast = pd.Series(dtw_forecast)
示例#14
0
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show() #偏自相关图
ADF(D_data[u'销量差分'])#平稳性检测

#白噪声检验
from statsmodels.stats.diagnostic import acorr_ljungbox
acorr_ljungbox(D_data, lags=1) #返回统计量和p值

from statsmodels.tsa.arima_model import ARIMA

#定阶
pmax = int(len(D_data)/10) #一般阶数不超过length/10
qmax = int(len(D_data)/10) #一般阶数不超过length/10
bic_matrix = [] #bic矩阵
for p in range(pmax+1):
  tmp = []
  for q in range(qmax+1):
    try: #存在部分报错,所以用try来跳过报错。
      tmp.append(ARIMA(data, (p,1,q)).fit().bic)
    except:
      tmp.append(None)
  bic_matrix.append(tmp)

bic_matrix = pd.DataFrame(bic_matrix) #从中可以找出最小值

p,q = bic_matrix.stack().idxmin() #先用stack展平,然后用idxmin找出最小值位置。
print(u'BIC最小的p值和q值为:%s、%s' %(p,q)) 
model = ARIMA(data, (0,1,1)).fit() #建立ARIMA(0, 1, 1)模型
model.summary() #给出一份模型报告
model.forecast(5) #作为期5天的预测,返回预测结果、标准误差、置信区间。
def programmer_6():
    """
    警告解释:
    # UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "
    调用了多次plt.show()
    解决方案,使用plt.subplot()

    # RuntimeWarning: overflow encountered in exp
    运算精度不够

    forecastnum-->预测天数
    plot_acf().show()-->自相关图
    plot_pacf().show()-->偏自相关图
    """
    discfile = 'data/arima_data.xls'
    forecastnum = 5
    data = pd.read_excel(discfile, index_col=u'日期')

    fig = plt.figure(figsize=(8, 6))
    # 第一幅自相关图
    ax1 = plt.subplot(411)
    fig = plot_acf(data, ax=ax1)

    # 平稳性检测
    print(u'原始序列的ADF检验结果为:', ADF(data[u'销量']))
    # 返回值依次为adf、pvalue、usedlag、nobs、critical values、icbest、regresults、resstore

    # 差分后的结果
    D_data = data.diff().dropna()
    D_data.columns = [u'销量差分']
    # 时序图
    D_data.plot()
    plt.show()
    # 第二幅自相关图
    fig = plt.figure(figsize=(8, 6))
    ax2 = plt.subplot(412)
    fig = plot_acf(D_data, ax=ax2)
    # 偏自相关图
    ax3 = plt.subplot(414)
    fig = plot_pacf(D_data, ax=ax3)
    plt.show()
    fig.clf()

    print(u'差分序列的ADF检验结果为:', ADF(D_data[u'销量差分']))  # 平稳性检测

    # 白噪声检验
    print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1))  # 返回统计量和p值
    data[u'销量'] = data[u'销量'].astype(float)
    # 定阶
    pmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
    qmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
    bic_matrix = []  # bic矩阵
    data.dropna(inplace=True)

    # 存在部分报错,所以用try来跳过报错;存在warning,暂未解决使用warnings跳过
    import warnings
    warnings.filterwarnings('error')
    for p in range(pmax + 1):
        tmp = []
        for q in range(qmax + 1):
            try:
                tmp.append(ARIMA(data, (p, 1, q)).fit().bic)
            except:
                tmp.append(None)
        bic_matrix.append(tmp)
    # 从中可以找出最小值
    bic_matrix = pd.DataFrame(bic_matrix)
    # 用stack展平,然后用idxmin找出最小值位置。
    p, q = bic_matrix.stack().idxmin()
    print(u'BIC最小的p值和q值为:%s、%s' % (p, q))
    model = ARIMA(data, (p, 1, q)).fit()  # 建立ARIMA(0, 1, 1)模型
    model.summary2()  # 给出一份模型报告
    model.forecast(forecastnum)  # 作为期5天的预测,返回预测结果、标准误差、置信区间。
示例#16
0
文件: ARIMA.py 项目: zpccc/ARIMA
plot_pacf(D_data).show()
print(u'1阶差分序列的ADF检验结果为:',ADF(D_data[u'销量差分']))
from statsmodels.stats.diagnostic import acorr_ljungbox
print(u'差分序列的白噪声检验结果为:',acorr_ljungbox(D_data,lags=1))
from statsmodels.tsa.arima_model import ARIMA
data[u'销量'] = data[u'销量'].astype(float)
pmax=int(len(D_data)/10)
qmax=int(len(D_data)/10)
bic_matrix=[]
for p in range(pmax+1):
    tmp=[]
    for q in range(qmax+1):
        try:
            tmp.append(ARIMA(data,(p,1,q)).fit().bic)
        except:
            tmp.append(None)
    bic_matrix.append(tmp)
bic_matrix=pd.DataFrame(bic_matrix)
print(bic_matrix)
p,q=bic_matrix.stack().idxmin()
print(u'bic最小的P值和q值为:%s、%s'%(p,q))
model=ARIMA(data,(p,1,q)).fit()
model.summary2()
forecast=model.forecast(5)
print(forecast)





#      就结果来看,如果取显著性水平为0.05,那么相关系数与零没有显著差异,即为白噪声序列。

print("===========================Ljung-Box检验========================================")
r_Ljung_Box,q_Ljung_Box,p_Ljung_Box = sm.tsa.acf(resid.values.squeeze(), qstat=True)
data = np.c_[range(1,41), r_Ljung_Box[1:], q_Ljung_Box, p_Ljung_Box]
table = pd.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
print(table.set_index('lag'))
print("===========================Ljung-Box检验========================================")


#6.数据预测
###################################使用forecast预测数据#########################################################################
print("##########使用forecast预测数据###################") 
# forecast返回值为有3个元素的元组(tuple),每个元素都是一个array,
# 说明:forecast : array, stderr : array,conf_int : array2D
predict_dta = arima.forecast(forecast_size) # 连续预测N个值
print(predict_dta)
print("##########使用forecast预测数据###################") 
###################################使用forecast预测数据#########################################################################

###################################使用plot_predict预测数据#####################################################################
print("##########使用plot_predict预测数据###################") 

if d == 0:
    predict_dta2 = arima.predict(start = forecast_start_date, end = forecast_end_date,dynamic = False)
else:
    predict_dta2 = arima.predict(start = forecast_start_date, end = forecast_end_date,dynamic = False,typ = forecast_typ)
print(predict_dta2)

xdata_pred2,ax = plt.subplots(figsize = fig_size )
ax = data_analysis.ix[1:].plot(ax=ax)
示例#18
0
def gen_ohlcv(interval):
    """
	Generate OHLCV Chart for BTCUSD with predicted price overlay.
	
	:params interval: update the graph based on an interval
	
	"""
    # hack to wrap interval around available data.  OOS starts at 1500, df has a
    # total of 2274 rows after processing to wrap around 2274-1500 ~ 750. Reset
    # prediction data to empty df.
    interval = interval % 750

    print("interva is {}...".format(interval))

    # read data from source
    df = get_ohlcv_data(interval - 100, interval)
    df['log_ret'] = np.log(df.Close) - np.log(df.Close.shift(1))

    print("\ndata df loaded, starting prediction...\n")
    # online training and forecast.
    model = ARIMA(df.tail(60)["log_ret"], order=(3, 1, 0),
                  freq='D').fit(disp=0)
    pred = model.forecast()[0]

    print("\nprediction ended, writing to output df...")

    # save forecast to output dataframe. should be dB irl.
    next_dt = df.tail(1).index[0] + pd.Timedelta('1 day')
    df_pred.loc[next_dt] = [
        pred[0], (np.exp(pred) * df.tail(1).Close.values)[0]
    ]
    print("\nnext datetime is {}...".format(next_dt))
    # get index location of period.
    loc = df_pred.index.get_loc(next_dt) + 1
    print("\nloc is {}...".format(loc))

    # slices for the past N periods perdiction for plotting
    df_pred_plot = df_pred.iloc[slice(max(0, loc - 30),
                                      min(loc, len(df)))].sort_index()
    print("\n set pred df for plotting...\n", df_pred_plot)

    # plotting ohlc candlestick
    trace_ohlc = go.Candlestick(
        x=df.tail(50).index,
        open=df['Open'].tail(50),
        close=df['Close'].tail(50),
        high=df['High'].tail(50),
        low=df['Low'].tail(50),
        opacity=0.5,
        hoverinfo="skip",
        name="BTCUSD",
    )

    # plotting prediction line
    trace_line = go.Scatter(x=df_pred_plot.index,
                            y=df_pred_plot.pred_Close,
                            line_color='yellow',
                            mode="lines+markers",
                            name="Predicted Close")

    layout = go.Layout(
        plot_bgcolor=app_color["graph_bg"],
        paper_bgcolor=app_color["graph_bg"],
        font={"color": "#fff"},
        height=700,
        xaxis={
            "showline": False,
            "showgrid": False,
            "zeroline": False,
        },
        yaxis={
            "showgrid": True,
            "showline": True,
            "fixedrange": True,
            "zeroline": True,
            "gridcolor": app_color["graph_line"],
            "title": "Price (USD$)"
        },
    )

    return go.Figure(data=[trace_ohlc, trace_line], layout=layout)
示例#19
0
文件: test.py 项目: pidanself/pytest
# ifsuccess判断本次报警是否成功,1成功,0失败
ifsuccess = 0
# sensitivity代表本次报警的灵敏度
sensitivity = 0
length = len(light_data)
middle = length // 2
# end即代表当前位置
end = middle - 54
# 历史窗口预测误差
history_f = [[] for i in range(length)]
windows_error = []
while end <= (length - 1):
    windows_data = light_data[end - 50:end]
    p, q = ARI(windows_data)
    model = ARIMA(windows_data, (p, diff, q)).fit()
    f5 = model.forecast(5)[0]
    for i in range(5):
        history_f[end + i].append(f5[i])
    if len(history_f[end]) == 5:
        x = np.mean(history_f[end])
        er = x - light_data[end]
        if len(windows_error) < 50:
            windows_error.append(er)
        else:
            if er > np.max(windows_error):
                if time[end] >= anomaly_start and time[end] <= anomaly_end:
                    ifsuccess = 1
                    sensitivity = (time[end] - t0) / t1
                break
            else:
                windows_error.pop(0)
    return (country_cases, country_daily_increase, country_daily_death,
            country_name)


country_cases, country_daily_increase, country_daily_death, country_name = country_visualizations(
    'Sri Lanka')

country_cases_df = pd.DataFrame(country_cases, columns=['Date', 'cases'])
country_daily_increase_df = pd.DataFrame(country_daily_increase,
                                         columns=['Date', 'cases'])
country_daily_death_df = pd.DataFrame(country_daily_death,
                                      columns=['Date', 'cases'])

arima = ARIMA(country_cases_df['cases'], order=(5, 1, 0))
arima = arima.fit(trend='c', full_output=True, disp=True)
forecast = arima.forecast(steps=30)
pred = list(forecast[0])

start_date = country_cases_df['Date'].iloc[-1]
prediction_dates = []
for i in range(30):
    date = start_date + timedelta(days=1)
    prediction_dates.append(date)
    start_date = date

fig = plt.figure()
#plt.xlabel("Dates",fontsize = 20)
plt.ylabel('Total cases', fontsize=20)
#plt.title("Predicted Total cases for the next 15 Days" , fontsize = 20)

obj, = plt.plot_date(y=pred,
示例#21
0
from statsmodels.tsa.stattools import adfuller as ADF
print(u'ADF:',ADF(data[u'high']))
D_data=data.diff().dropna()
D_data.columns=[u'result']
D_data.plot();
plt.show()
plot_acf(D_data).show()
plt.show()
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show()
#print(u'ADF2:',ADF(D_data[u'result2']))
from statsmodels.stats.diagnostic import acorr_ljungbox
#print(u'result3:',acorr_ljungbox(D_data,lags=1))
from statsmodels.tsa.arima_model import ARIMA
pmax=int(len(D_data)/10)
qmax=int(len(D_data)/10)
bic_matrix=[]
for p in range(pmax)+1:
 tmp=[]
  for q in range(qmax+1):
   try:
    tmp.append(ARIMA(data,(p,1,q)).fit().bic)
   except:
    tmp.append(None)
 bic_matrix.append(tmp)
bic_matrix=pd.DataFrame(bic_matrix)
p,q=bic_matrix.stack().idxmin()
model=ARIMA(data,(p,1,q)).fit()
model.summary2()
model.forecast(1)
示例#22
0
                              start_q=0,
                              max_p=10,
                              max_q=10,
                              m=4,
                              start_P=0,
                              seasonal=True,
                              d=1,
                              D=1,
                              trace=True,
                              error_action="ignore",
                              suppress_warnings=True,
                              stepwise=False)

auto_arima_model.summary()  # SARIMAX(1, 1, 1)x(0, 1, 1, 12)
# AIC ==> 1348.728
# BIC ==> 1362.665

# For getting Fitted values for train data set we use
# predict_in_sample() function
auto_arima_model.predict_in_sample()

# For getting predictions for future we use predict() function
pred_test = pd.Series(auto_arima_model.predict(n_periods=12))
# Adding the index values of Test Data set to predictions of Auto Arima
pred_test.index = Test.index
MAPE(pred_test, Test.Sales)  # 12.72

from statsmodels.tsa.arima_model import ARIMA
model = ARIMA(plastic.Sales, order=(1, 1, 0)).fit(transparams=True)
forecasterrors = model.forecast(steps=12)[0]  #it will give the next 12 values
示例#23
0
def loop_train(dataset, i):
    loop_train_model = ARIMA(dataset['Adj Close'], (0, 1, 1)).fit()
    dataset['Adj Close'].loc[datetime.datetime(
        2015, 12, 12 + i)] = loop_train_model.forecast(1)[0][0]
    return loop_train_model.forecast(1)[0]
            #存在部分报错,所以用try来跳过报错。
            try:
                tmp.append(ARIMA(ts_log, (p, 1, q)).fit().bic)
            except:
                tmp.append(None)
        bic_matrix.append(tmp)
    #从中可以找出最小值
    bic_matrix = pd.DataFrame(bic_matrix)
    #先用stack展平,然后用idxmin找出最小值位置。
    p, q = bic_matrix.stack().idxmin()
    #print bic_matrix
    print(u'商店:%s,BIC最小的p值和q值为:%s、%s' % (a + 1, p, q))
    #建立ARIMA(0, 1, 1)模型
    model = ARIMA(ts_log, (p, 1, q)).fit()
    #作为期90天的预测,返回预测结果、标准误差、置信区间。
    aaa = np.exp(model.forecast(90)[0])
    t[a] = aaa

#print t
t1 = pd.DataFrame(np.array(t))[0]
for i in range(1, 90):
    t1 = pd.concat([t1, pd.DataFrame(np.array(t))[i]], axis=0)
t1 = t1.reset_index(drop=True)

rng = pd.date_range('2017-01-01', '2017-03-31', freq='D')
result = pd.DataFrame()
result['date'] = rng
result = pd.concat([
    result, result, result, result, result, result, result, result, result,
    result, result, result, result, result, result
],
示例#25
0
from db_tools import *
示例#26
0
    # 定阶
    pmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
    qmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
    bic_matrix = []  # bic矩阵
    for p in range(pmax + 1):
        tmp = []
        for q in range(qmax + 1):
            try:  # 存在部分报错,所以用try来跳过报错。
                tmp.append(ARIMA(all_index, (p, 1, q)).fit().bic)
            except:
                tmp.append(None)
        bic_matrix.append(tmp)
    bic_matrix = pd.DataFrame(bic_matrix)  # 从中可以找出最小值

    p, q = bic_matrix.stack().idxmin()  # 先用stack展平,然后用idxmin找出最小值位置。
    print(u'BIC最小的p值和q值为:%s、%s' % (p, q))
    model = ARIMA(all_index, (1, 1, 0)).fit()  # 建立ARIMA(0, 1, 1)模型
    model.summary2()  # 给出一份模型报告
    model.forecast(10)[0]  # 作为期5天的预测,返回预测结果、标准误差、置信区间。
    ax = all_index.plot()
    fig = model.predict('2017-10-01', '2018-05-01', dynamic=True)
    plt.show()
    # sql_ygjq = "select NVL(mon1,mon2) as m1,NVL(id1,id2) as id1,nvl(sum1,0) as sum1, \
    #            NVL(mon2,mon1) as m2,NVL(id2,id1)as id2,NVL(sum2,0) as sum2 from \
    #            (select t1.t_month as mon1,t1.aab001 as id1,t1.cnt as sum1, \
    #            t2.t_month as mon2,t2.aab001 as id2,t2.cnt as sum2 \
    #            from (select * from AB01_WITH_CD01_COUNT_RESULT where t_month = '201701')t1 \
    #            FULL join  (select * from AB01_WITH_CD01_COUNT_RESULT where t_month = '201702')t2 \
    #            on t1.aab001=t2.aab001 )"
    # company = pd.read_sql_query(sql_zc, con=db)
plot_pacf(D_data).show()  #偏自相关图
print(u'差分序列的ADF检验结果为:', ADF(D_data[u'销量差分']))  #平稳性检测

#白噪声检验
from statsmodels.stats.diagnostic import acorr_ljungbox
print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1))  #返回统计量和p值

from statsmodels.tsa.arima_model import ARIMA

data[u'销量'] = data[u'销量'].astype(float)
#定阶
pmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
qmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
bic_matrix = []  #bic矩阵
for p in range(pmax + 1):
    tmp = []
    for q in range(qmax + 1):
        try:  #存在部分报错,所以用try来跳过报错。
            tmp.append(ARIMA(data, (p, 1, q)).fit().bic)
        except:
            tmp.append(None)
    bic_matrix.append(tmp)

bic_matrix = pd.DataFrame(bic_matrix)  # 从中可以找出最小值

p, q = bic_matrix.stack().idxmin()  # 先用stack展平,然后用idxmin找出最小值位置。
print(u'BIC最小的p值和q值为:%s、%s' % (p, q))
model = ARIMA(data, (p, 1, q)).fit()  # 建立ARIMA(0, 1, 1)模型
model.summary2()  # 给出一份模型报告
model.forecast(5)  # 作为期5天的预测,返回预测结果、标准误差、置信区间。
    ax2 = fig.add_subplot(gs[1,0])
    plot_acf(series, ax=ax2, title='ACF')
    
    ax3 = fig.add_subplot(gs[1,1])
    sns.kdeplot(series, ax=ax3)
    ax3.set_title('density')
    
    plt.show()


# %%
check_residuals(residuals)


# %%
arima_forecast, se, conf = arima.forecast(24)

arima_forecast = pd.Series(arima_forecast, index=airpassengers_test.index)
lower_series = pd.Series(conf[:, 0], index=airpassengers_test.index)
upper_series = pd.Series(conf[:, 1], index=airpassengers_test.index)


# %%
plt.plot(airpassengers_season_diff_train, label='train')
plt.plot(arima_forecast, label='forecast')

plt.fill_between(lower_series.index, lower_series, upper_series, color='k', alpha=.15)
plt.legend()


# %%
示例#29
0
def programmer_6():
    """
    警告解释:
    # UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "
    调用了多次plt.show()
    解决方案,使用plt.subplot()

    # RuntimeWarning: overflow encountered in exp
    运算精度不够

    forecastnum-->预测天数
    plot_acf().show()-->自相关图
    plot_pacf().show()-->偏自相关图
    """
    discfile = 'data/arima_data.xls'
    forecastnum = 5
    data = pd.read_excel(discfile, index_col=u'日期')

    fig = plt.figure(figsize=(8, 6))
    # 第一幅自相关图
    ax1 = plt.subplot(411)
    fig = plot_acf(data, ax=ax1)

    # 平稳性检测
    print(u'原始序列的ADF检验结果为:', ADF(data[u'销量']))
    # 返回值依次为adf、pvalue、usedlag、nobs、critical values、icbest、regresults、resstore

    # 差分后的结果
    D_data = data.diff().dropna()
    D_data.columns = [u'销量差分']
    # 时序图
    D_data.plot()
    plt.show()
    # 第二幅自相关图
    fig = plt.figure(figsize=(8, 6))
    ax2 = plt.subplot(412)
    fig = plot_acf(D_data, ax=ax2)
    # 偏自相关图
    ax3 = plt.subplot(414)
    fig = plot_pacf(D_data, ax=ax3)
    plt.show()
    fig.clf()

    print(u'差分序列的ADF检验结果为:', ADF(D_data[u'销量差分']))  # 平稳性检测

    # 白噪声检验
    print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1))  # 返回统计量和p值
    data[u'销量'] = data[u'销量'].astype(float)
    # 定阶
    pmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
    qmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
    bic_matrix = []  # bic矩阵
    data.dropna(inplace=True)

    # 存在部分报错,所以用try来跳过报错;存在warning,暂未解决使用warnings跳过
    import warnings
    warnings.filterwarnings('error')
    for p in range(pmax + 1):
        tmp = []
        for q in range(qmax + 1):
            try:
                tmp.append(ARIMA(data, (p, 1, q)).fit().bic)
            except:
                tmp.append(None)
        bic_matrix.append(tmp)
    # 从中可以找出最小值
    bic_matrix = pd.DataFrame(bic_matrix)
    # 用stack展平,然后用idxmin找出最小值位置。
    p, q = bic_matrix.stack().idxmin()
    print(u'BIC最小的p值和q值为:%s、%s' % (p, q))
    model = ARIMA(data, (p, 1, q)).fit()  # 建立ARIMA(0, 1, 1)模型
    model.summary2()  # 给出一份模型报告
    model.forecast(forecastnum)  # 作为期5天的预测,返回预测结果、标准误差、置信区间。
        print('模型ARIMA(%s,1, %s)不符合白噪音检验' % (p, q))
        print('在BIC矩阵中去掉[%s,%s]组合,重新进行计算' % (p, q))
        matrix.iloc[p, q] = np.nan
        arimafail = arima
        continue
    else:
        # print(p,q)
        print('模型ARIMA(%s,%s)符合白噪声检验' % (p, q))
        break
'''

 '''

# 第   5   步--C盘---------模型预测
print('模型报告:summary():\n', arima.summary())
forecast_values, forecasts_standard_error, forecast_confidence_interval = arima.forecast(
    5)

pre_data = pd.DataFrame(xtest_value)
pre_data.insert(1, 'CWXT_DB:184:D:\\_predict', forecast_values)
pre_data.rename(columns={
    'CWXT_DB:184:D:\\': '实际值',
    'CWXT_DB:184:D:\\_predict': '预测值'
},
                inplace=True)
result_d = pre_data.applymap(lambda x: '%.2f' % x)
result_d.to_excel('../my_data/pedictdata_D_BIC_ARMA.xlsx')

# 第   5   步--D盘---------模型评价
# 为了评价时序预测模型效果的好坏,本章采用3个衡量模型预测精度的统计量指标:平均绝对误差、均方根误差、平均绝对百分误差
result = pd.read_excel('../my_data/pedictdata_D_BIC_ARMA.xlsx',
                       index_col='COLLECTTIME')
示例#31
0
print(u'1-diff series white noise test result: ',
      acorr_ljungbox(D_data, lags=1))  # 返回统计量和p值

data[u'SALES_VOLUME'] = data[u'SALES_VOLUME'].astype(float)
# 定阶
pmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
qmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
bic_matrix = []  # bic矩阵
for p in range(pmax + 1):
    tmp = []
    for q in range(qmax + 1):
        try:  # 存在部分报错,所以用try来跳过报错。
            tmp.append(ARIMA(data, (p, 1, q)).fit().bic)
        except:
            tmp.append(None)
    bic_matrix.append(tmp)

bic_matrix = pd.DataFrame(bic_matrix)  # 从中可以找出最小值

p, q = bic_matrix.stack().idxmin()  # 先用stack展平,然后用idxmin找出最小值位置。
print(u'BIC minimal p-value and q-value is:%s、%s' % (p, q))
model = ARIMA(data, (p, 1, q)).fit()  # 建立ARIMA(0, 1, 1)模型

# 给出一份模型报告
print("************************************************************")
print(model.summary2())
print("************************************************************")
print()
print("************************************************************")
print(model.forecast(forecastnum))  # 作为期forecastnum天的预测,返回预测结果、标准误差、置信区间。
# In[68]:

train.head()

# In[69]:

modelFit = ARIMA(train, order=(2, 0, 2)).fit()

# In[70]:

modelFit.summary()

# In[71]:

forcastData = modelFit.forecast(steps=20)[0]

meanSquareError = mean_squared_error(test, forcastData)
print('MSE: ' + str(meanSquareError))
rootMeanSquareError = np.sqrt(meanSquareError)
print('RMSE: ' + str(rootMeanSquareError))

# In[72]:

plt.figure(figsize=(12, 5))
plt.plot(train.index.to_pydatetime(), train, label='training')
plt.plot(test.index.to_pydatetime(), test, label='actual')
plt.plot(test.index.to_pydatetime(), forcastData, label='forecast')
plt.legend()

# In[73]:
示例#33
0
class ModeDecomp(object):
    def __init__(self, dataSet, type, test_size=24):
        data = dataSet.set_index('date')
        data.index = pd.to_datetime(data.index)
        self.dataSet = data
        self.test_size = test_size
        self.train_size = len(self.dataSet) - self.test_size
        # self.mile_train = self.dataSet['mileage_utilization'][:len(self.dataSet) - test_size]
        # self.time_train = self.dataSet['time_utilization'][:len(self.dataSet) - test_size]
        # self.num_rain = self.dataSet['pick_up_freq'][:len(self.dataSet) - test_size]
        self.train = self.dataSet[type][:len(self.dataSet) - test_size]
        self.train = self._diff_smooth(self.train)
        # self.train = self._diff_smooth(self.time_train)
        # self.num_rain = self._diff_smooth(self.num_rain)
        self.test = self.dataSet[type][-test_size:]
        # self.test =  self.dataSet['time_utilization'][-test_size:]

    # 对数据进行平滑处理
    def _diff_smooth(self, dataSet):
        dif = dataSet.diff()  # 差分序列
        td = dif.describe()
        high = td['75%'] + 1.5 * (td['75%'] - td['25%'])  # 定义高点阈值,1.5倍四分位距之外
        low = td['25%'] - 1.5 * (td['75%'] - td['25%'])  # 定义低点阈值,同上

        # 变化幅度超过阈值的点的索引
        forbid_index = dif[(dif > high) | (dif < low)].index
        i = 0
        while i < len(forbid_index) - 1:
            n = 1  # 发现连续多少个点变化幅度过大,大部分只有单个点
            start = forbid_index[i]  # 异常点的起始索引
            while forbid_index[i + n] == start + timedelta(minutes=60 * n):
                n += 1
                if (i + n) > len(forbid_index) - 1:
                    break
            i += n - 1
            end = forbid_index[i]  # 异常点的结束索引
            # 用前后值的中间值均匀填充
            try:
                value = np.linspace(dataSet[start - timedelta(minutes=60)],
                                    dataSet[end + timedelta(minutes=60)], n)
                dataSet[start:end] = value
            except:
                pass
            i += 1
        return dataSet

    def decomp(self, freq):
        decomposition = seasonal_decompose(self.train,
                                           freq=freq,
                                           two_sided=False)
        self.trend = decomposition.trend
        self.seasonal = decomposition.seasonal
        self.residual = decomposition.resid
        # decomposition.plot()
        # plt.show()
        d = self.residual.describe()
        delta = d['75%'] - d['25%']
        self.low_error, self.high_error = (d['25%'] - 1 * delta,
                                           d['75%'] + 1 * delta)

    def trend_model(self, order):
        self.trend.dropna(inplace=True)
        self.trend_model_ = ARIMA(self.trend, order).fit(disp=-1, method='css')
        # return self.trend_model_

    def predict_new(self):
        """
        预测新数据
        :return:
        """
        n = self.test_size
        self.pred_time_index = pd.date_range(start=self.train.index[-1],
                                             periods=n + 1,
                                             freq='60min')[1:]
        self.trend_pred = self.trend_model_.forecast(n)[0]
        pred_time_index = self.add_season()
        return pred_time_index

    def add_season(self):
        '''
        为预测出的趋势数据添加周期数据和残差数据
        '''
        self.train_season = self.seasonal[:self.train_size]
        values = []
        low_conf_values = []
        high_conf_values = []

        for i, t in enumerate(self.pred_time_index):
            trend_part = self.trend_pred[i]
            #相同时间的数据均值
            season_part = self.train_season[self.train_season.index.time ==
                                            t.time()].mean()
            #趋势+周期+误差界限
            predict = trend_part + season_part
            low_bound = trend_part + season_part + self.low_error
            high_bound = trend_part + season_part + self.high_error

            values.append(predict)
            low_conf_values.append(low_bound)
            high_conf_values.append(high_bound)
        self.final_pred = pd.Series(values,
                                    index=self.pred_time_index,
                                    name='predict')
        self.low_conf = pd.Series(low_conf_values,
                                  index=self.pred_time_index,
                                  name='low_conf')
        self.high_conf = pd.Series(high_conf_values,
                                   index=self.pred_time_index,
                                   name='high_conf')

        return self.pred_time_index
示例#34
0
文件: arima.py 项目: memoiry/2016-
plt.rcParams['axes.unicode_minus'] = False
data.plot()
plt.show()

from statsmodels.graphics.tsaplots import plot_acf
plot_acf(data).show()

from statsmodels.tsa.stattools import adfuller as ADF 

print 'ADF test result:', ADF(data['value'])

D_data = data.diff().dropna()
D_data.columns = ['diff value']
D_data.plot()
plt.show()
plot_acf(D_data).show()
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show()
print 'diff seq ADF test result:', ADF(D_data['diff value'])

from statsmodels.stats.diagnostic import acorr_ljungbox
print 'dff white noise test result:', acorr_ljungbox(D_data, lags = 1)

from statsmodels.tsa.arima_model import ARIMA


model = ARIMA(data, (1,1,1)).fit()
model.summary2()
model.forecast(5*6)