Exemplo n.º 1
0
def main():
    file = 'E:\\IntegrationTesting\\Predictive-Analytics\\ARMA\datasets\\dataset3.xlsx'
    head = 0
    sheet = 1
    df = readFile(file, head, sheet)
    print "Want to plot for blocked connections? : "
    plot(df)
    df = acf_pacf(df)
    #print df
    p = raw_input("Enter order of AR model : ")
    q = raw_input("Enter order of MA model : ")
    #print df, len(df)
    arma_pq = sm.tsa.ARMA(df, (int(p),int(q))).fit()
    print arma_pq.params, " :Parameters"
    print "AIC : {0}, BIC : {1}, HQIC : {2} ".format(arma_pq.aic, arma_pq.bic, arma_pq.hqic)
    dw = durbin_watson(arma_pq)
    resid = arma_pq.resid
    normalTest = normaltest(resid)
    print "Printing Q-Q plot"
    qqplot(resid)
    print "Plotting acf & pacf for residual values"
    acf_pacf(resid, 2)
    r, q, p = get_acf(resid)
    start, end = getStartEnd(df)
    predictions = predict(arma_pq, start, end)
    mean = meanForecastErr(df.blocked, predictions)
    print "Mean error in forecasting : {0}. \nNote that this error is on 1st difference and not actual blocked connections".format(mean)
    res = solver(predictions)
    print res
Exemplo n.º 2
0
def main():
    import scipy.stats as ss
    print('正态检验',ss.normaltest(ss.norm.rvs(size=10)))#正态检验
    print('卡四方表格',ss.chi2_contingency([[15, 95], [85, 5]], False))#卡方四格表
    print('独立分布检验',ss.ttest_ind(ss.norm.rvs(size=10), ss.norm.rvs(size=20)))#t独立分布检验
    print('F分布检验',ss.f_oneway([49, 50, 39,40,43], [28, 32, 30,26,34], [38,40,45,42,48]))#F分布检验
    from statsmodels.graphics.api import qqplot
    from matplotlib import pyplot as plt
    qqplot(ss.norm.rvs(size=100))#QQ图
    plt.show()

    s = pd.Series([0.1, 0.2, 1.1, 2.4, 1.3, 0.3, 0.5])
    df = pd.DataFrame([[0.1, 0.2, 1.1, 2.4, 1.3, 0.3, 0.5], [0.5, 0.4, 1.2, 2.5, 1.1, 0.7, 0.1]]) #s1,s2
Exemplo n.º 3
0
def main():
    import scipy.stats as ss
    print(ss.normaltest(ss.norm.rvs(size=10)))  #正态检验
    print(ss.chi2_contingency([[15, 95], [85, 5]], False))  #卡方四格表
    print(ss.ttest_ind(ss.norm.rvs(size=10), ss.norm.rvs(size=20)))  #t独立分布检验
    print(
        ss.f_oneway([49, 50, 39, 40, 43], [28, 32, 30, 26, 34],
                    [38, 40, 45, 42, 48]))  #F分布检验
    from statsmodels.graphics.api import qqplot
    from matplotlib import pyplot as plt
    qqplot(ss.norm.rvs(size=100))  #QQ图
    plt.show()

    s = pd.Series([0.1, 0.2, 1.1, 2.4, 1.3, 0.3, 0.5])
    df = pd.DataFrame([[0.1, 0.2, 1.1, 2.4, 1.3, 0.3, 0.5],
                       [0.5, 0.4, 1.2, 2.5, 1.1, 0.7, 0.1]])
    #相关分析
    print(s.corr(pd.Series([0.5, 0.4, 1.2, 2.5, 1.1, 0.7, 0.1])))
    print(df.corr())

    import numpy as np
    #回归分析
    x = np.arange(10).astype(np.float).reshape((10, 1))
    y = x * 3 + 4 + np.random.random((10, 1))
    print(x)
    print(y)
    from sklearn.linear_model import LinearRegression
    linear_reg = LinearRegression()
    reg = linear_reg.fit(x, y)
    y_pred = reg.predict(x)
    print(reg.coef_)
    print(reg.intercept_)
    print(y.reshape(1, 10))
    print(y_pred.reshape(1, 10))
    plt.figure()
    plt.plot(x.reshape(1, 10)[0], y.reshape(1, 10)[0], "r*")
    plt.plot(x.reshape(1, 10)[0], y_pred.reshape(1, 10)[0])
    plt.show()

    #PCA降维
    df = pd.DataFrame(
        np.array([
            np.array([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2, 1, 1.5, 1.1]),
            np.array([2.4, 0.7, 2.9, 2.2, 3, 2.7, 1.6, 1.1, 1.6, 0.9])
        ]).T)
    from sklearn.decomposition import PCA
    lower_dim = PCA(n_components=1)
    lower_dim.fit(df.values)
    print("PCA")
    print(lower_dim.explained_variance_ratio_)
    print(lower_dim.explained_variance_)
Exemplo n.º 4
0
 def drawPIT(self, data, cdf, xlabel, ylabel, title, isSave, savePath,
             isShow):
     lw = 4
     fontsize = 40
     fig = plt.figure(figsize=(12, 12))
     axs = fig.add_subplot(111)
     fig = qqplot(data, dist=cdf, line='45', ax=axs)
     deta = 1.358 / (len(data))**0.5 * (2**0.5)
     axs.plot([deta, 1], [0, 1 - deta],
              '--',
              color='blueviolet',
              lw=lw,
              label='Kolmogorov 5% significance band')
     axs.plot([0, 1 - deta], [deta, 1], '--', color='blueviolet', lw=lw)
     axs.set_title(title, loc="center", fontsize=fontsize)
     axs.set_xlabel(xlabel, fontsize=fontsize)
     axs.set_ylabel(ylabel, fontsize=fontsize)
     axs.set_xlim([0, 1])
     axs.set_ylim([0, 1])
     plt.xticks(fontsize=fontsize)
     plt.yticks(fontsize=fontsize)
     plt.grid()
     plt.legend(fontsize=25)
     if isShow:
         plt.show()
     if isSave:
         fig.savefig(savePath, bbox_inches="tight", dpi=300)
     plt.close()
Exemplo n.º 5
0
def residue_test(residue):
    '''

        观察ARIMA模型的残差是否是平均值为0且方差为常数的正态分布

    '''
    fig = plt.figure(figsize=(12, 8))
    # ax1 = fig.add_subplot(211)
    # fig = plot_acf(residue.values.squeeze(), lags=35, ax=ax1)
    # plt.show()

    ax2 = fig.add_subplot(212)
    fig = plot_pacf(residue.values.squeeze(), lags=35, ax=ax2)
    plt.show()

    # 通过q-q图观察,检验残差是否符合正态分布
    fig = plt.figure(figsize=(8, 6))
    ax = fig.add_subplot(111)
    fig = qqplot(residue, line='q', ax=ax, fit=True)
    plt.show()

    # Ljung-Box Test - 基于一些列滞后阶数,判断序列总体的相关性或随机性是否存在
    r1, q1, p1 = ACF(residue.values.squeeze(), qstat=True)
    tmp = np.c_[list(range(1, 36)), r1[1:], q1, p1]
    table = pd.DataFrame(tmp, columns=['lag', 'AC', 'Q', 'Prob(>Q)'])
    print(table.set_index('lag')[:15])

    # 残差的白噪声检验
    print('残差的白噪声检验结果为:', acorr_ljungbox(residue, lags=1))
Exemplo n.º 6
0
def valid_model(data):
    """
    模型检验
    :param data:
    :return:
    """

    arma_mod80 = sm.tsa.ARMA(data, (8, 0)).fit()
    resid = arma_mod80.resid
    fig = plt.figure(figsize=(12, 8))
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(data, lags=40, ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(data, lags=40, ax=ax2)
    plt.show()

    print(sm.stats.durbin_watson(arma_mod80.resid.values))

    fig = plt.figure(figsize=(12, 8))
    ax = fig.add_subplot(111)
    fig = qqplot(resid, line='q', ax=ax, fit=True)
    plt.show()

    r, q, p = sm.tsa.acf(resid.values.squeeze(), qstat=True)
    data = np.c_[range(1, 41), r[1:], q, p]
    table = pd.DataFrame(data, columns=['lag', 'AC', 'Q', 'Prob(>Q)'])
    print(table.set_index('lag'))
Exemplo n.º 7
0
    def check_norm_qq(self, ):
        norm = stats.normaltest(arma_mod.resid)
        print norm

        figure003 = plt.figure(figsize=(12, 6))
        ax5 = figure003.add_subplot(111)
        figqq = qqplot(arma_mod.resid, ax=ax5, fit=True, line='q')
        plt.show()
Exemplo n.º 8
0
def analise_model(arma_mod):
    plot_model(arma_mod)

    resid = arma_mod.resid

    print(scipy.stats.normaltest(resid))

    fig = plt.figure(figsize=(12, 8))
    ax = fig.add_subplot(111)
    fig = qqplot(resid, line='q', ax=ax, fit=True)
def ARMA_model(train, order):
    arma_model = ARMA(train, order)  # ARMA模型
    result = arma_model.fit()  # 激活模型
    print(result.summary())  # 给出一份模型报告
    ############ in-sample ############
    pred = result.predict()

    pred.plot()
    train.plot()
    print('标准差为{}'.format(mean_squared_error(train, pred)))

    # 残差
    resid = result.resid
    # 利用QQ图检验残差是否满足正态分布
    plt.figure(figsize=(12, 8))
    qqplot(resid, line='q', fit=True)
    plt.show()
    # 利用D-W检验,检验残差的自相关性
    print('D-W检验值为{}'.format(durbin_watson(resid.values)))
    return result
Exemplo n.º 10
0
def seasonal_detect(ts, trend, seasonal, residual):
    """直接对残差进行分析,我们检查残差的稳定性"""
    ts_decompose = residual
    ts_decompose.dropna(inplace=True)
    test_stationarity(ts_decompose)
    print('---------------------------------------------')
    fig = plt.figure
    fig = qqplot(residual, line='q', fit=True)
    fig.title('qqplot of residual')
    plt.show()

    fig = plt.figure(figsize=(12, 8))
    #ts
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(ts, lags=40, ax=ax1)
    ax1.xaxis.set_ticks_position('bottom')
    fig.tight_layout()

    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(ts, lags=40, ax=ax2)
    ax2.xaxis.set_ticks_position('bottom')
    plt.savefig('ts_aacf_pacf.jpg', dpi=300)
    plt.show()
    fig.tight_layout()
    print('-----------------------------------------------')
    #trend
    fig = plt.figure(figsize=(12, 8))

    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(trend, lags=40, ax=ax1)
    ax1.xaxis.set_ticks_position('bottom')
    fig.tight_layout()

    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(trend, lags=40, ax=ax2)
    ax2.xaxis.set_ticks_position('bottom')
    plt.savefig('trend_acf_pacf.jpg', dpi=300)
    plt.show()
    fig.tight_layout()
    print('-----------------------------------------------')
    #seasonal
    fig = plt.figure(figsize=(12, 8))
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(seasonal, lags=40, ax=ax1)
    ax1.xaxis.set_ticks_position('bottom')
    fig.tight_layout()
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(seasonal, lags=40, ax=ax2)
    ax2.xaxis.set_ticks_position('bottom')
    plt.savefig('season_acf_pacf.jpg', dpi=300)
    plt.show()
    fig.tight_layout()
Exemplo n.º 11
0
def arima_handler(dta, start, end):
    #dta, x = data.dataHandler('./tmpfile00431',0.5)
    dta = pd.TimeSeries(dta)
    #dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700','2060'))
    dta.index = pd.Index(sm.tsa.datetools.dates_from_range(start, end))
    dta.plot(figsize=(12, 8))

    fig = plt.figure(figsize=(12, 8))
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40, ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(dta, lags=40, ax=ax2)

    arma_mod20 = sm.tsa.ARMA(dta, (2, 0)).fit()
    #print(arma_mod20)

    arma_mod30 = sm.tsa.ARMA(dta, (3, 0)).fit()
    #print(arma_mod30)

    print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic)
    print(arma_mod30.aic, arma_mod30.bic, arma_mod30.hqic)

    if arma_mod20.aic < arma_mod30.aic:
        sm.stats.durbin_watson(arma_mod20.resid.values)
        fig = plt.figure(figsize=(12, 8))
        ax = fig.add_subplot(111)
        ax = arma_mod20.resid.plot(ax=ax)

        resid = arma_mod20.resid
        stats.normaltest(resid)

        fig = plt.figure(figsize=(12, 8))
        ax = fig.add_subplot(111)
        fig = qqplot(resid, line='q', ax=ax, fit=True)

        fig = plt.figure(figsize=(12, 8))
        ax1 = fig.add_subplot(211)
        fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)
        ax2 = fig.add_subplot(212)
        fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)

        r, q, p = sm.tsa.acf(resid.values.squeeze(), qstat=True)
        data = np.c_[range(1, 41), r[1:], q, p]
        #table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
        table = pd.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
        #print(table.set_index('lag'))

        predict_sunspots = arma_mod20.predict(str(string.atoi(start) + 360),
                                              str(string.atoi(end) + 5),
                                              dynamic=True)
        #print(predict_sunspots)
        return predict_sunspots
Exemplo n.º 12
0
def arima_handler(dta, start, end):
    #dta, x = data.dataHandler('./tmpfile00431',0.5)
    dta = pd.TimeSeries(dta)
    #dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700','2060'))
    dta.index = pd.Index(sm.tsa.datetools.dates_from_range(start,end))
    dta.plot(figsize=(12,8))

    fig = plt.figure(figsize=(12,8))
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40, ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(dta, lags=40, ax=ax2)

    arma_mod20 = sm.tsa.ARMA(dta, (2,0)).fit()
    #print(arma_mod20)

    arma_mod30 = sm.tsa.ARMA(dta, (3,0)).fit()
    #print(arma_mod30)

    print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic)
    print(arma_mod30.aic, arma_mod30.bic, arma_mod30.hqic)

    if arma_mod20.aic < arma_mod30.aic:
        sm.stats.durbin_watson(arma_mod20.resid.values)
        fig = plt.figure(figsize=(12,8))
        ax = fig.add_subplot(111)
        ax = arma_mod20.resid.plot(ax=ax);

        resid = arma_mod20.resid
        stats.normaltest(resid)

        fig = plt.figure(figsize=(12,8))
        ax = fig.add_subplot(111)
        fig = qqplot(resid, line='q', ax=ax, fit=True)

        fig = plt.figure(figsize=(12,8))
        ax1 = fig.add_subplot(211)
        fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)
        ax2 = fig.add_subplot(212)
        fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)

        r,q,p = sm.tsa.acf(resid.values.squeeze(), qstat=True)
        data = np.c_[range(1,41), r[1:], q, p]
        #table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
        table = pd.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
        #print(table.set_index('lag'))

        predict_sunspots = arma_mod20.predict(str(string.atoi(start)+360),str(string.atoi(end)+5), dynamic=True)
        #print(predict_sunspots)
        return predict_sunspots
Exemplo n.º 13
0
 def processData(self,p,q):
     self.arma_mod = sm.tsa.ARMA(self.data, (p,q)).fit()
     print("AIC:",str(self.arma_mod.aic))
     print("BIC:",str(self.arma_mod.bic))
     print("HQIC:",str(self.arma_mod.hqic))
     resid = self.arma_mod.resid
     print("DW value:",sm.stats.durbin_watson(resid.values))
     fig = figure(figsize=(12, 8))
     ax1 = fig.add_subplot(311)
     fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)
     ax2 = fig.add_subplot(312)
     fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)
     ax = fig.add_subplot(313)
     fig = qqplot(resid, line='q', ax=ax, fit=True)
     show()
Exemplo n.º 14
0
def testModelFit(arma_mod30, dta):
    # does our model fit the theory?
    residuals = arma_mod30.resid
    sm.stats.durbin_watson(residuals.values)
    # NOTE: Durbin Watson Test Statistic approximately equal to 2*(1-r)
    #       where r is the sample autocorrelation of the residuals.
    #       Thus, for r == 0, indicating no serial correlation,
    #       the test statistic equals 2. This statistic will always be
    #       between 0 and 4. The closer to 0 the statistic, the more evidence
    #       for positive serial correlation. The closer to 4, the more evidence
    #       for negative serial correlation.

    # plot the residuals so we can see if there are any areas in time which
    # are poorly explained.
    fig = plt.figure(figsize=(12, 8))
    ax = fig.add_subplot(111)
    ax = arma_mod30.resid.plot(ax=ax)

    plt.savefig(config.plot_dir + 'ARIMAX_test_residualsVsTime.png',
                bbox_inches='tight')
    #    plt.show()
    # tests if samples are different from normal dist.
    k2, p = stats.normaltest(residuals)
    print("residuals skew (k2):" + str(k2) +
          " fit w/ normal dist (p-value): " + str(p))

    # plot residuals
    fig = plt.figure(figsize=(12, 8))
    ax = fig.add_subplot(211)
    fig = qqplot(residuals, line='q', ax=ax, fit=True)

    ax2 = fig.add_subplot(212)
    # resid_dev = residuals.resid_deviance.copy()
    # resid_std = (resid_dev - resid_dev.mean()) / resid_dev.std()
    plt.hist(residuals, bins=25)
    plt.title('Histogram of standardized deviance residuals')
    plt.savefig(config.plot_dir + 'ARIMAX_test_residualsNormality.png',
                bbox_inches='tight')

    plt.clf()
    # plot ACF/PACF for residuals
    plotACFAndPACF(residuals, 'residualsACFAndPACF.png')

    r, q, p = sm.tsa.acf(residuals.values.squeeze(), qstat=True)
    data = np.c_[range(1, 41), r[1:], q, p]
    table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
    print(table.set_index('lag'))
Exemplo n.º 15
0
def ARIMA_modeling(data, order, test):
    tempModel = ARIMA(data.values, order).fit()  # fit the value into model

    fig = plt.figure(figsize=(12, 8))
    ax = fig.add_subplot(111)
    print("QQ plot of residuals (should be like a straight line)")
    fig = qqplot(tempModel.resid, line='q', ax=ax, fit=True)
    print("######################")
    noiseRes = acorr_ljungbox(tempModel.resid, lags=1)
    print("result of residual's white noice testing (should be very large)")
    print('stat                  | p-value')
    for x in noiseRes:
        print(
            x,
            '|',
        )
    print("######################")
    predicts = tempModel.forecast(steps=len(test))[0]
    pred_CI = tempModel.forecast(steps=len(test))[2]
    low, high = [], []
    for i in range(len(pred_CI)):
        low.append(pred_CI[i][0])
        high.append(pred_CI[i][1])

    comp = pd.DataFrame()
    comp['original'] = test
    comp['predict'] = predicts
    comp['low'] = low
    comp['high'] = high
    comp.plot()

    print("######################")
    rms = sqrt(mean_squared_error(test, predicts))
    print('mean squared error: ', rms)
    print("######################")

    q = (abs(comp['original'] - comp['predict']) / comp['original']) * 100
    print(q)
    print('average MAPE: ', np.mean(abs(q)), '%')
    print('If MAPE is "Inf", it because the test data contains 0')
    print("######################")
    q1 = (abs(comp['original'] - comp['predict']) /
          ((comp['original'] + comp['predict']) / 2)) * 100
    print(q1)
    print('average Symmetric MAPE: ', np.mean(abs(q1)), '%')
Exemplo n.º 16
0
def testModelFit(arma_mod30, dta):
    # does our model fit the theory?
    residuals = arma_mod30.resid
    sm.stats.durbin_watson(residuals.values)
    # NOTE: Durbin Watson Test Statistic approximately equal to 2*(1-r)
    #       where r is the sample autocorrelation of the residuals.
    #       Thus, for r == 0, indicating no serial correlation,
    #       the test statistic equals 2. This statistic will always be
    #       between 0 and 4. The closer to 0 the statistic, the more evidence
    #       for positive serial correlation. The closer to 4, the more evidence
    #       for negative serial correlation.

    # plot the residuals so we can see if there are any areas in time which
    # are poorly explained.
    fig = plt.figure(figsize=(12,8))
    ax = fig.add_subplot(111)
    ax = arma_mod30.resid.plot(ax=ax);

    plt.savefig(FIG_DIR+'residualsVsTime.png', bbox_inches='tight')
#    plt.show()

    # tests if samples are different from normal dist.
    k2, p = stats.normaltest(residuals)
    print ("residuals skew (k2):" + str(k2) +
           " fit w/ normal dist (p-value): " + str(p))

    # plot residuals
    fig = plt.figure(figsize=(12,8))
    ax = fig.add_subplot(211)
    fig = qqplot(residuals, line='q', ax=ax, fit=True)

    ax2 = fig.add_subplot(212)
    # resid_dev = residuals.resid_deviance.copy()
    # resid_std = (resid_dev - resid_dev.mean()) / resid_dev.std()
    plt.hist(residuals, bins=25);
    plt.title('Histogram of standardized deviance residuals');
    plt.savefig(FIG_DIR+'residualsNormality.png', bbox_inches='tight')

    # plot ACF/PACF for residuals
    plotACFAndPACF(residuals, 'residualsACFAndPACF.png')

    r,q,p = sm.tsa.acf(residuals.values.squeeze(), qstat=True)
    data = np.c_[range(1,41), r[1:], q, p]
    table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
    print table.set_index('lag')
def residual_test(residual, lags=31):
    # plot acf and pacf
    fig = plt.figure(facecolor='white')
    ax1 = fig.add_subplot(211)
    fig = plot_acf(residual.values.squeeze(), lags=lags, ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = plot_pacf(residual, lags=lags, ax=ax2)
    plt.show()

    # Durbin-Watson test: 2, no autocorrelation; 4: negtive autocorrelation; 0: positive autocorrelation
    #print sm.stats.durbin_watson(arma_moddel.resid.values)

    fig = plt.figure(figsize=(12, 8))
    ax = fig.add_subplot(111)
    fig = qqplot(residual, line='q', ax=ax, fit=True)
    plt.show()

    ljung_box_test(residual)
Exemplo n.º 18
0
def tsdiag( resid ):
    '''
    展示模型检验的结果
    :param resid:
    :return:
    '''
    fig = plt.figure(figsize=(12,8));

    ax1 = fig.add_subplot(311);
    fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1);

    ax2 = fig.add_subplot(312);
    fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2);

    ax3 = fig.add_subplot(313);
    fig = qqplot(resid, line='q', ax=ax3, fit=True)

    plt.show();
Exemplo n.º 19
0
	def rosen(x):
	return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)
	x0 = np.array([1.3, 0.7, 0.8, 1.9, 1.2])
	res = minimize(rosen, x0, method='nelder-mead',
	options={'xtol': 1e-8, 'disp': True})
	print(res.x)
	
	
	import statsmodels.api as sm
	import statsmodels.formula.api as smf
	star98 = sm.datasets.star98.load_pandas().data
	formula = 'SUCCESS ~ LOWINC + PERASIAN + PERBLACK + PERHISP + PCTCHRT + \
	PCTYRRND + PERMINTE*AVYRSEXP*AVSALK + PERSPENK*PTRATIO*PCTAF'
	dta = star98[['NABOVE', 'NBELOW', 'LOWINC', 'PERASIAN', 'PERBLACK', 'PERHISP',
	'PCTCHRT', 'PCTYRRND', 'PERMINTE', 'AVYRSEXP', 'AVSALK',
	'PERSPENK', 'PTRATIO', 'PCTAF']].copy()
	endog = dta['NABOVE'] / (dta['NABOVE'] + dta.pop('NBELOW'))
	del dta['NABOVE']
	dta['SUCCESS'] = endog
	mod1 = smf.glm(formula=formula, data=dta, family=sm.families.Binomial()).fit()
	print(mod1.summary())
	
	import numpy as np
	from scipy import stats
	import pandas as pd
	import matplotlib.pyplot as plt
	import statsmodels.api as sm
	from statsmodels.graphics.api import qqplot
	print(sm.datasets.sunspots.NOTE)
	
	arma_mod20 = sm.tsa.ARMA(dta, (2,0)).fit(disp=False)
	print(arma_mod20.params)
	arma_mod30 = sm.tsa.ARMA(dta, (3,0)).fit(disp=False)
	resid = arma_mod30.resid
	stats.normaltest(resid)
	fig = plt.figure(figsize=(12,8))
	ax = fig.add_subplot(111)
	fig = qqplot(resid, line='q', ax=ax, fit=True)
	predict_sunspots = arma_mod30.predict('1990', '2012', dynamic=True)
	print(predict_sunspots)
	fig, ax = plt.subplots(figsize=(12, 8))
	ax = dta.ix['1950':].plot(ax=ax)
	fig = arma_mod30.plot_predict('1990', '2012', dynamic=True, ax=ax, plot_insample=False)
Exemplo n.º 20
0
def model_detect(result):
    """模型检验"""
    import statsmodels.api as sm
    fig = plt.figure(figsize=(12, 8))
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(result.resid.values.squeeze(),
                                   lags=40,
                                   ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(result.resid, lags=40, ax=ax2)
    plt.show()

    print(sm.stats.durbin_watson(result.resid.values))
    #检验结果是1.93206697832,说明不存在自相关性。

    resid = result.resid  #残差
    fig = plt.figure(figsize=(12, 8))
    ax = fig.add_subplot(111)
    fig = qqplot(resid, line='q', ax=ax, fit=True)
    plt.show()
Exemplo n.º 21
0
def check_resid_wd_acf_pacf_qq(model):
    """残差白噪声序列检验、计算D-W检验的结果,越接近于2效果就好"""
    resid = model.resid
    print(stats.normaltest(resid))
    print(sm.stats.durbin_watson(resid))

    fig = plt.figure(figsize=(12, 4))
    ax = fig.add_subplot(111)
    fig = qqplot(resid, line='q', ax=ax, fit=True)

    r, q, p = sm.tsa.acf(resid.values.squeeze(), qstat=True)
    data = np.c_[range(1, 41), r[1:], q, p]
    table = pd.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
    print(table.set_index('lag'))

    fig = plt.figure(figsize=(12, 8))
    ax1 = fig.add_subplot(211)
    fig = plot_acf(resid, lags=40, ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = plot_pacf(resid, lags=40, ax=ax2)
    plt.show()
Exemplo n.º 22
0
 def drawPIT(data,
             cdf=stats.uniform,
             xlabel="uniform distribution",
             ylabel="PIT",
             title="",
             isShow=False,
             isSave=False,
             savePath=None):
     lw = 4
     fontsize = 40
     fig = plt.figure(figsize=(16, 16))
     axs = fig.add_subplot(111)
     fig = qqplot(data, dist=cdf, line='45', ax=axs)
     deta = 1.358 / (len(data))**0.5 * (2**0.5)
     axs.plot([deta, 1], [0, 1 - deta],
              '--',
              color='blueviolet',
              lw=lw,
              label='Kolmogorov 5% significance band')
     axs.plot([0, 1 - deta], [deta, 1], '--', color='blueviolet', lw=lw)
     axs.set_title(title, loc="center", fontsize=fontsize)
     axs.set_xlabel(xlabel, fontsize=fontsize)
     axs.set_ylabel(ylabel, fontsize=fontsize)
     axs.set_xlim([0, 1])
     axs.set_ylim([0, 1])
     plt.xticks(fontsize=fontsize)
     plt.yticks(fontsize=fontsize)
     plt.grid()
     plt.legend(fontsize=25)
     if isShow:
         plt.show()
     dirPath = os.path.dirname(savePath)
     if not os.path.exists(dirPath):
         os.makedirs(dirPath)
     if isSave:
         fig.savefig(savePath, bbox_inches="tight", dpi=300)
     plt.close()
    print(u'模型ARIMA(%s,1,%s)符合白噪声检验' % (p, q))

#残差的自相关图
fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = plot_acf(pred_error, ax=ax1)
ax2 = fig.add_subplot(212)
fig = plot_pacf(pred_error, ax=ax2)
fig.show()

#D-W检验
print(sm.stats.durbin_watson(pred_error))

#绘制qq图
fig = plt.figure(figsize=(12, 8))
fig = qqplot(pred_error, line='q', fit=True)
fig.show()

###不同差分次数的精度
##print(ARIMA(dta, (p,0,q)).fit().bic)
##print(ARIMA(dta, (p,1,q)).fit().bic)
##
##print(ARIMA(dta, (p,0,q)).fit().aic)
##print(ARIMA(dta, (p,1,q)).fit().aic)
###print(ARIMA(dta, (p,2,q)).fit().aic) #MA系数不可逆
##
###差分比较
##fig1 = plt.figure(figsize=(8,7))
##ax1= fig1.add_subplot(211)
##diff1 = dta.diff(1)
##diff1.plot(ax=ax1)
Exemplo n.º 24
0
def armia(id, product, predicted):
    flag = True  #whethe the model fail to find the params
    #1.data prepocessing
    sale_per_product = product[str(id)]
    data = np.array(sale_per_product, dtype=np.float)
    src = data
    data = pd.Series(data)
    data.index = pd.Index(np.arange(118))
    data.plot(figsize=(12, 8))
    plt.title("product_" + str(id))
    if visible:
        plt.show()
    #2.时间序列的差分d
    fig = plt.figure(figsize=(12, 8))
    ax1 = fig.add_subplot(111)
    diff = data.diff(1)
    diff.plot(ax=ax1)
    plt.title("diff" + str(id))
    if visible:
        plt.show()
    #3.find the proper p and q
    #3.1 model selection
    p_, d_, q_ = grid_search(data, search_mode)
    print((p_, d_, q_))
    info = []
    info.append(str(id))
    with open("log.txt", 'w+') as f:
        f.writelines(info)
        #f.writelines(sale_per_product)
    try:
        arma_mod = sm.tsa.ARMA(data, order=(p_, d_, q_)).fit()
    except:
        p_, d_, q_ = grid_search(data, "error")
        arma_mod = sm.tsa.ARMA(data, order=(p_, d_, q_)).fit()
        flag = False

    #3.2 check the res
    resid = arma_mod.resid
    #print(sm.stats.durbin_watson(resid.values))

    #3.3 if normal distribution
    #print(stats.normaltest(resid))
    fig = plt.figure(figsize=(12, 8))
    ax = fig.add_subplot(111)
    fig = qqplot(resid, line='q', ax=ax, fit=True)
    # plt.show()

    # 3.5残差序列检验
    r, q, p = sm.tsa.acf(resid.values.squeeze(), qstat=True)
    rdata = np.c_[range(1, 41), r[1:], q, p]
    table = pd.DataFrame(rdata, columns=['lag', "AC", "Q", "Prob(>Q)"])
    #print(table.set_index('lag'))

    predict_dta = arma_mod.predict(117, 144, dynamic=True)
    predicted[str(id)] = np.array(predict_dta)
    print(predict_dta)
    print((p_, d_, q_))

    plt.subplot(111)
    all = np.concatenate((src, np.array(predict_dta).astype(int)))
    plt.plot(np.arange(all.size), all)
    plt.title("whole sale of the product")
    if visible:
        plt.show()
    return flag
    '''''' '''
    # 3.prediction
    fig, ax = plt.subplots(figsize=(12, 8))
    ax = data.ix[0:].plot(ax=ax)

    fig = arma_mod.plot_predict(117, 144, dynamic=True, ax=ax, plot_insample=False)
    plt.legend([ax,fig],["previous sale","predicted sale"],loc='upper right')
    plt.title('whole sale of the product')
    #plt.show()
    ''' ''''''
    plt.ylabel('Correlation')
    plt.title('Log(Return) Autocorrelation')
    sm.graphics.tsa.plot_acf(stock_data['log_ret'].values.squeeze(), lags=60, ax=ax1)
    ax2 = fig.add_subplot(212)
    plt.title('Log(Return) Pacf')
    plt.xlabel('Lag (Business Days)')
    plt.ylabel('Correlation')
    sm.graphics.tsa.plot_pacf(stock_data['log_ret'], lags=60, ax=ax2)
    plt.figtext(0.5, 0.95,'Daily Return Correllations by Date')
    plt.savefig('stock_logregcorr.png')
    
    sys.exit()
    
    fig = plt.figure(figsize=(12,8))
    plt.title('qq plot of the log(return)')
    qqplot(stock_data['log_ret'], line='q', ax=plt.gca(), fit=True)
    plt.savefig('logrec_qq.png')
    
    train_arr = get_pred_arr(stock_data)
    test_arr =get_pred_arr(test_data)     

    scaler = preprocessing.StandardScaler().fit(train_arr)
    sctrain_arr = scaler.transform(train_arr) 
    sctest_arr = scaler.transform(test_arr) 

    from sklearn.cross_validation import train_test_split
    stock_train, stock_cv, true_train, true_cv = train_test_split(sctrain_arr, stock_data['log_ret'].fillna(method='backfill').values, test_size=0.33, random_state=42)

    # reweight outliers
    weighter_scale =  preprocessing.StandardScaler().fit(true_train)
    train_weight_outliers = 5.0*np.abs(weighter_scale.transform(true_train))+1
plt.show()

# In[6]:

#残差的ACF和PACF图,可以看到序列残差基本为白噪声

#进一步进行D-W检验,是目前检验自相关性最常用的方法,但它只使用于检验一阶自相关性。
#DW=4<=>ρ=-1 即存在负自相关性
#DW=2<=>ρ=0  即不存在(一阶)自相关性
#因此,当DW值显著的接近于O或4时,则存在自相关性,而接近于2时,则不存在(一阶)自相关性。

print(sm.stats.durbin_watson(ar10.resid.values))
#观察是否符合正态分布,这里使用QQ图,它用于直观验证一组数据是否来自某个分布,或者验证某两组数据是否来自同一(族)分布。
print(stats.normaltest(resid))
fig = plt.figure(figsize=(12, 8))
fig = qqplot(resid, line='q', fit=True)
plt.show()
#结果表明基本符合正态分布

# In[7]:

predict_dta = ar10.forecast(steps=5)
import datetime
fig = ar10.plot_predict(
    pd.to_datetime('2017-01-01') + datetime.timedelta(days=190),
    pd.to_datetime('2017-01-01') + datetime.timedelta(days=220),
    dynamic=False,
    plot_insample=True)
plt.show()

# In[8]:
Exemplo n.º 27
0
ax = fig.add_subplot(111)
ax = arma_mod30.resid.plot(ax=ax);

# <codecell>

resid = arma_mod30.resid

# <codecell>

stats.normaltest(resid)

# <codecell>

fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111)
fig = qqplot(resid, line='q', ax=ax, fit=True)

# <codecell>

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)

# <codecell>

r,q,p = sm.tsa.acf(resid.values.squeeze(), qstat=True)
data = np.c_[range(1,41), r[1:], q, p]
table = pandas.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
print table.set_index('lag')
Exemplo n.º 28
0
# In[74]:

z, p = stats.normaltest(result.resid.values)

# In[75]:

p

# In[76]:

result.params

# In[77]:

fig, ax = plt.subplots(figsize=(8, 4))
smg.qqplot(result.resid, ax=ax)

fig.tight_layout()
fig.savefig("ch14-qqplot-model-1.pdf")

# In[78]:

model = smf.ols("y ~ x1 + x2 + x1*x2", data)

# In[79]:

result = model.fit()

# In[80]:

print(result.summary())
    sm.graphics.tsa.plot_acf(stock_data['log_ret'].values.squeeze(),
                             lags=60,
                             ax=ax1)
    ax2 = fig.add_subplot(212)
    plt.title('Log(Return) Pacf')
    plt.xlabel('Lag (Business Days)')
    plt.ylabel('Correlation')
    sm.graphics.tsa.plot_pacf(stock_data['log_ret'], lags=60, ax=ax2)
    plt.figtext(0.5, 0.95, 'Daily Return Correllations by Date')
    plt.savefig('stock_logregcorr.png')

    sys.exit()

    fig = plt.figure(figsize=(12, 8))
    plt.title('qq plot of the log(return)')
    qqplot(stock_data['log_ret'], line='q', ax=plt.gca(), fit=True)
    plt.savefig('logrec_qq.png')

    train_arr = get_pred_arr(stock_data)
    test_arr = get_pred_arr(test_data)

    scaler = preprocessing.StandardScaler().fit(train_arr)
    sctrain_arr = scaler.transform(train_arr)
    sctest_arr = scaler.transform(test_arr)

    from sklearn.cross_validation import train_test_split
    stock_train, stock_cv, true_train, true_cv = train_test_split(
        sctrain_arr,
        stock_data['log_ret'].fillna(method='backfill').values,
        test_size=0.33,
        random_state=42)
Exemplo n.º 30
0
# * Does our model obey the theory?

sm.stats.durbin_watson(arma_mod30.resid.values)

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
ax = arma_mod30.resid.plot(ax=ax)

resid = arma_mod30.resid

stats.normaltest(resid)

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
fig = qqplot(resid, line="q", ax=ax, fit=True)

fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)

r, q, p = sm.tsa.acf(resid.values.squeeze(), fft=True, qstat=True)
data = np.c_[np.arange(1, 25), r[1:], q, p]

table = pd.DataFrame(data, columns=["lag", "AC", "Q", "Prob(>Q)"])
print(table.set_index("lag"))

# * This indicates a lack of fit.
Exemplo n.º 31
0
# Load in the image for Subject 1.
img = nib.load(pathtodata + "BOLD/task001_run001/bold.nii.gz")
data = img.get_data()
data = data[..., 6:]  # Knock off the first 6 observations.

# Pull out a single voxel.
voxel = data[41, 47, 2]
plt.plot(voxel)
plt.close()
# Sort of a curve = nonconstant mean.
# Variance also seems to be funky toward the ends.
plt.hist(voxel)
plt.close()
# Long right tail.
qqplot(voxel, line='q')
plt.close()
# More-or-less normal, with deviations at tails.

# Box-Cox method to find best power transformation.
bc = stats.boxcox(voxel)
bc[1]  # Lambda pretty close to 0, so try log transformation.
print("Log transforming data.")

# Log transform the data.
lvoxel = np.log(voxel)
plt.plot(lvoxel)
plt.close()
plt.hist(lvoxel)
plt.close()
qqplot(lvoxel, line='q')
def plot_qqplot(arma_mod):
  fig = plt.figure(figsize=(12,8))
  ax = fig.add_subplot(111)
  fig = qqplot(arma_mod.resid, line='q', ax=ax, fit=True)
  plt.show()
Exemplo n.º 33
0
# Load in the image for Subject 1.
img = nib.load(pathtodata + "BOLD/task001_run001/bold.nii.gz")
data = img.get_data()
data = data[..., 6:]  # Knock off the first 6 observations.

# Pull out a single voxel.
voxel = data[41, 47, 2]
plt.plot(voxel)
plt.close()
# Sort of a curve = nonconstant mean.
# Variance also seems to be funky toward the ends.
plt.hist(voxel)
plt.close()
# Long right tail.
qqplot(voxel, line="q")
plt.close()
# More-or-less normal, with deviations at tails.

# Box-Cox method to find best power transformation.
bc = stats.boxcox(voxel)
bc[1]  # Lambda pretty close to 0, so try log transformation.
print("Log transforming data.")

# Log transform the data.
lvoxel = np.log(voxel)
plt.plot(lvoxel)
plt.close()
plt.hist(lvoxel)
plt.close()
qqplot(lvoxel, line="q")
Exemplo n.º 34
0
def qqplot(resid):
    from statsmodels.graphics.api import qqplot
    fig = plt.figure(figsize=(12,8))
    ax = fig.add_subplot(111)
    fig = qqplot(resid, line='q', ax = ax, fit = True)
    pylab.show()
Exemplo n.º 35
0
def predict_arma(number, index, data, original_index, original_data):
    # axes list
    ax = []
    # difference list
    diff = []
    # order number
    order_num = 5
    # Set the index as date type
    df = pd.DataFrame({
        'year': 1999,
        'month': 3,
        'day': 1,
        'minute': index * 5
    })
    original_df = pd.DataFrame({
        'year': 1999,
        'month': 3,
        'day': 1,
        'minute': original_index * 5
    })
    # print pd.to_datetime(df)
    data = pd.Series(data, index=pd.to_datetime(df))
    original_data = pd.Series(original_data, index=pd.to_datetime(original_df))
    # data = pd.Series(data, index=pd.to_datetime(index, unit='m'))
    # print data

    fig = plt.figure("Differences of Diverse Orders in Day %s" % number,
                     figsize=(10, 4 * order_num))

    # Show differences with i-order
    # for i in range(1, order_num + 1):
    #     ax.append(fig.add_subplot(order_num, 1, i))
    #     # Get the difference of time series, which is
    #     # the d parameter of ARIMA(p, d, q)
    #     diff.append(data.diff(i))
    #     # Plot the i-order of difference
    #     diff[i - 1].plot(ax=ax[i - 1])

    # # After observation, choose first-order difference
    order = 1
    data = data.diff(order)
    original_data = original_data.diff(order)
    print data
    print original_data
    # "data[0]=NaN" causes the autocorrelation figure shows abnormally
    for i in range(order):
        data[i] = 0.0
        original_data[i] = 0.0
    # autocorrelation_plot(data)
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(data, lags=40, ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(data, lags=40, ax=ax2)

    #==================================
    # arma_mod70 = sm.tsa.ARMA(data, (7, 0)).fit()

    arma_mod = []
    row = 3
    col = 3
    # ARMA(0, 2) is the best
    for i in range(row):
        temp = []
        for j in range(col):
            temp.append(sm.tsa.ARMA(data, (i, j)).fit())
        arma_mod.append(temp)

    for i in range(row):
        for j in range(col):
            print(arma_mod[i][j].aic, arma_mod[i][j].bic, arma_mod[i][j].hqic)
    #==================================

    # get the mininal value of aic/bic
    res = sm.tsa.arma_order_select_ic(data, ic=['aic', 'bic'], trend='nc')
    # print res.aic_min_order
    # print res.bic_min_order

    # arma_mod00 = sm.tsa.ARMA(data, (0, 0)).fit()
    # arma_mod01 = sm.tsa.ARMA(data, (0, 1)).fit()
    # arma_mod02 = sm.tsa.ARMA(data, (0, 2)).fit()
    # arma_mod10 = sm.tsa.ARMA(data, (1, 0)).fit()
    # arma_mod11 = sm.tsa.ARMA(data, (1, 1)).fit()
    # arma_mod12 = sm.tsa.ARMA(data, (1, 2)).fit()
    # arma_mod20 = sm.tsa.ARMA(data, (2, 0)).fit()
    # arma_mod21 = sm.tsa.ARMA(data, (2, 1)).fit()
    # arma_mod22 = sm.tsa.ARMA(data, (2, 2)).fit()

    # print(arma_mod00.aic, arma_mod00.bic, arma_mod00.hqic)
    # print(arma_mod01.aic, arma_mod01.bic, arma_mod01.hqic)
    # print(arma_mod02.aic, arma_mod02.bic, arma_mod02.hqic)
    # print(arma_mod10.aic, arma_mod10.bic, arma_mod10.hqic)
    # print(arma_mod11.aic, arma_mod11.bic, arma_mod11.hqic)
    # print(arma_mod12.aic, arma_mod12.bic, arma_mod12.hqic)
    # print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic)
    # print(arma_mod21.aic, arma_mod21.bic, arma_mod21.hqic)
    # print(arma_mod22.aic, arma_mod22.bic, arma_mod22.hqic)

    # Autocorrelation for ARMA(0, 2)
    # fit model
    # model = ARIMA(data_bak, order=(1, 1, 0))
    # model_fit = model.fit(disp=0)
    # print model_fit.summary()

    # # plot residual errors
    # residuals = DataFrame( model_fit.resid )
    # residuals.plot()
    # residuals.plot(kind='kde')
    # print residuals.describe()

    # row = 3
    # col = 3
    # model = []
    # model_fit = []

    # for i in range(row):
    #     temp = []
    #     for j in range(col):
    #         # print(i, j)
    #         temp.append(ARIMA(data_bak, order=(i, 1, j)))
    #     model.append(temp)

    # for i in range(row):
    #     temp = []
    #     for j in range(col):
    #         print(i, j)
    #         # print model[i][j].fit(disp=0).summary()
    #         temp.append(model[i][j].fit(disp=0))
    #     model_fit.append(temp)

    # for i in range(row):
    #     for j in range(col):
    #         print model_fit[i][j].summary()

    #         # plot residual errors
    #         residuals = DataFrame(model_fit[i][j].resid)
    #         residuals.plot()
    #         residuals.plot(kind='kde')
    #         print residuals.describe()

    # plot autocorrelation of residual errors
    # predict_model = arma_mod[0][2]
    predict_model = arma_mod[res.aic_min_order[0]][res.aic_min_order[1]]
    resid = predict_model.resid
    fig = plt.figure("Autocorrelation of residuals", figsize=(12, 8))
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)

    # Durbin-Watson Exam
    # DW is in [0, 4], where
    # DW = 4 <=> p(rou) = -1, DW = 2 <=> p(rou) = 0, DW = 0 <=> p(rou) = 1
    print(sm.stats.durbin_watson(resid.values))

    # Check the data are from the same distribution or not
    fig = plt.figure("Check for the data validation", figsize=(12, 8))
    ax = fig.add_subplot(111)
    fig = qqplot(resid, line='q', ax=ax, fit=True)

    # Ljung-Box Exam
    r, q, p = sm.tsa.acf(resid.values.squeeze(), qstat=True)
    lb_data = np.c_[range(1, 41), r[1:], q, p]
    table = pd.DataFrame(lb_data, columns=['lag', "AC", "Q", "Prob(>Q)"])
    print(table.set_index('lag'))

    # Prediction with arma model
    begin_time = str(pd.to_datetime(df)[len(index) - 1])
    end_time = str(pd.to_datetime(original_df)[len(original_index) - 1])
    # print pd.to_datetime(df)[len(index)-1]
    # print pd.to_datetime(original_df)[len(original_index)-1]
    predict_sunspots = predict_model.predict(begin_time,
                                             end_time,
                                             dynamic=True)
    print predict_sunspots
    fig, ax = plt.subplots(figsize=(10, 3))
    ax = original_data.ix['1999-03-01 00:00:00':].plot(ax=ax)
    predict_sunspots.plot(ax=ax)
Exemplo n.º 36
0
# 计算ARMA模型的评估准则
arma_mod = sm.tsa.ARMA(time_series, (1, 1)).fit()
print('AIC:', arma_mod.aic, 'BIC:', arma_mod.bic, 'HQIC:', arma_mod.hqic)
# ARMA模型回归的诊断
resid = list(arma_mod.resid)
fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(resid, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)
plt.show()
# Durbin-Watson检验值
print('Durbin-Watson:', sm.stats.durbin_watson(arma_mod.resid))
# 残差QQ图
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
fig = qqplot(np.array(resid), line='q', ax=ax, fit=True)
plt.show()

# 新的预测序列值,对应于一阶差分的计算
predict = arma_mod.predict(N_history - 1, N_history + 3, dynamic=True)
print(predict)
predict = predict.cumsum()  # 不用差分时两行注释掉
predict = time_series_ori[-1] + predict
print(predict)
time_series_ori_predict = np.r_[time_series_ori, predict]
plt.plot(range(len(time_series_ori)), time_series_ori, 'b')
plt.plot(range(len(time_series_ori), len(time_series_ori) + 5), predict, 'r')
plt.show()
    plt.subplots_adjust(hspace=0.4)
    ax1 = fig.add_subplot(211)
    plt.xlabel('Lag (Business Days)')
    plt.ylabel('Correlation')
    plt.title('Residual Volume Autocorrelation')
    sm.graphics.tsa.plot_acf(stock_data['ARMAResid'].values.squeeze(), lags=60, ax=ax1)
    ax2 = fig.add_subplot(212)
    plt.title('Residual Volume  Pacf')
    plt.xlabel('Lag (Business Days)')
    plt.ylabel('Correlation')
    sm.graphics.tsa.plot_pacf(stock_data['ARMAResid'], lags=60, ax=ax2)
    plt.savefig('volume_ARMAresidcorr.png')
    
    fig = plt.figure(figsize=(12,8))
    plt.title('qq plot of the ARMA Volume Residual')
    qqplot(stock_data['ARMAResid'], line='q', ax=plt.gca(), fit=True)
    plt.savefig('volARMAresid_qq.png')

    error_figure(stock_data, 'ARMAResid', 'ARMAPredictVolume', 
                 'ARMANormResids','ARMA Only')
    plt.savefig('ARMA_stock_residual_panel.png')

    error_figure(test_data, 'ARMAResid', 'ARMAPredictVolume', 
                 'ARMANormResids','ARMA Only')
    plt.savefig('ARMA_test_residual_panel.png')
    plt.close('all')
    
    train_arr = get_pred_arr(stock_data)
    test_arr =get_pred_arr(test_data)     

    scaler = preprocessing.StandardScaler().fit(train_arr)
# Fit an ARIMA model

# In[39]:

arma_mod20 = sm.tsa.ARMA(dta, (2, 0)).fit(disp=False)
print(arma_mod20.params)  #arma自回归移动平均

# In[40]:

arma_mod30 = sm.tsa.ARMA(dta, (3, 0)).fit(disp=False)
resid = arma_mod30.resid
stats.normaltest(resid)
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
fig = qqplot(resid, line='q', ax=ax, fit=True)
#resid残差项
#qqplot绘制qq图

# Let's then do some predictions

# In[41]:

predict_sunspots = arma_mod30.predict('1990', '2012', dynamic=True)
print(predict_sunspots)
fig, ax = plt.subplots(figsize=(12, 8))
ax = dta.ix['1950':].plot(ax=ax)
fig = arma_mod30.plot_predict('1990',
                              '2012',
                              dynamic=True,
                              ax=ax,
Exemplo n.º 39
0
import tushare as ts
import pandas as pd
import statsmodels.api as sm
data = ts.get_k_data('600858', start='2016-02-05',end='2016-06-05')
share_change=data['close']-data['open']
share_change.index=pd.Index(sm.tsa.datetools.dates_from_range('1','78'))
from statsmodels.tsa.stattools import adfuller #检测序列平稳性,单位根检验。
dftest = adfuller(share_change, autolag='AIC')
print(dftest[1])#在保证ADF检验的p<0.01的情况下,阶数越小越好
from statsmodels.stats.diagnostic import acorr_ljungbox
p_value = acorr_ljungbox(share_change, lags=1)#检测autocorrelation,P>0.5代表有相关
share_acf = sm.graphics.tsa.plot_acf(share_change,lags=40)# acf图
share_pacf = sm.graphics.tsa.plot_pacf(share_change,lags=40)#pacf图
print(share_acf,share_pacf)
share_change1=list(share_change)
arma_mod1 = sm.tsa.ARMA(share_change1,(2,1)).fit()
print(arma_mod1.aic,arma_mod1.bic,arma_mod1.hqic)
arma_mod2 = sm.tsa.ARMA(share_change1,(2,2)).fit()
print(arma_mod2.aic,arma_mod2.bic,arma_mod2.hqic)
arma_mod3 = sm.tsa.ARMA(share_change1,(1,0)).fit()
print(arma_mod3.aic,arma_mod3.bic,arma_mod3.hqic)
arma_mod4 = sm.tsa.ARMA(share_change1,(0,1)).fit()
print(arma_mod4.aic,arma_mod4.bic,arma_mod4.hqic)
resid = arma_mod4.resid
from statsmodels.graphics.api import qqplot
figqq = qqplot(resid)
print(figqq)
predict_ts = arma_mod2.predict(start=79,end=82)