Exemplo n.º 1
0
 def __analysis_index(self):
     index = self.get_env().query_data(Index_Data).get_data_serise()
     index_name = list(index.columns)
     index_name.remove(COM_DATE)
     index[index_name] = index[index_name].pct_change()/100
     index[index_name] = np.log(index[index_name]+1)
     index = index.set_index(COM_DATE)
     index.index = pd.to_datetime(index.index)
     res = pd.DataFrame(columns = ['mean','std','skew','kurt','jarque-Bera','adf','lm'])
     for index_name_ in index_name:
         fig, ax = plt.subplots()
         ax.plot(index[index_name_].dropna(), label=index_name_)
         ax.set_xlabel('时间')
         ax.set_ylabel('收益率的对数')
         ax.set_title(index_name_+'收益率图')
         ax.legend()
         plt.savefig(os.path.join(RESULTS, index_name_+'.png'))
         plt.close()
         fig, ax = plt.subplots()
         ax.hist(index[index_name_].dropna(),bins =25)
         ax.set_xlabel('收益率范围')
         ax.set_ylabel('收益率的对数')
         ax.set_title(index_name_+'收益率图')
         plt.savefig(os.path.join(RESULTS, index_name_+'bar.png'))
         plt.close()
         res.loc[index_name_] = [
             np.nanmean(index[index_name_].dropna()),
             np.nanstd(index[index_name_].dropna()),
             index[index_name_].dropna().skew(),
             index[index_name_].dropna().kurt(),
             stats.jarque_bera(index[index_name_].dropna())[0],
             adfuller(index[index_name_].dropna())[4]['5%'],
             q_stat(acf(index[index_name_].dropna())[1:13],len(index[index_name_].dropna()))[1][-1]
         ]
     res.to_csv(os.path.join(RESULTS,'index_info.csv'))
Exemplo n.º 2
0
    def plot_correlogram(self, lags=10, title=None):
        # NOTE: without passing residuals this meethod can notbe used by the optimal brute force finder

        def moving_average(self, a: pd.array, n: int = 3):
            ret = np.cumsum(a)
            ret[n:] = ret[n:] - ret[:-n]
            return ret[n - 1:] / n

        matplotlib.use(
            'TkAgg'
        )  # NOTE: necessary due to inheritence of TimeSeries which uses 'Agg'
        x = self.data
        lags = min(10, int(len(x) / 5)) if lags is None else lags
        fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 8))
        axes[0][0].plot(x.values)  # Residuals
        # axes[0][0].plot(moving_average(x, n=21), c='k', lw=1) # moving average of risiduals # FIXME calculate moveaverage
        q_p = np.max(q_stat(acf(x, nlags=lags), len(x))[1])
        stats = f'Q-Stat: {np.max(q_p):>8.2f}\nADF: {adfuller(x)[1]:>11.2f}'
        axes[0][0].text(x=.02, y=.85, s=stats, transform=axes[0][0].transAxes)
        probplot(x, plot=axes[0][1])
        mean, var, skew, kurtosis = moment(x, moment=[1, 2, 3, 4])
        s = f'Mean: {mean:>12.2f}\nSD: {np.sqrt(var):>16.2f}\nSkew: {skew:12.2f}\nKurtosis:{kurtosis:9.2f}'
        axes[0][1].text(x=.02, y=.75, s=s, transform=axes[0][1].transAxes)
        plot_acf(x=x, lags=lags, zero=False, ax=axes[1][0])
        plot_pacf(x, lags=lags, zero=False, ax=axes[1][1])
        axes[1][0].set_xlabel('Lag')
        axes[1][1].set_xlabel('Lag')
        fig.suptitle(title, fontsize=14)
        sns.despine()
        fig.tight_layout()
        fig.subplots_adjust(top=.9)
        fig1 = plt.gcf()
        print('plotting')
        plt.show()
Exemplo n.º 3
0
def WhiteNoiseTest(ret, nlags=20, isprintsummary=False):
    """
    白雜訊測試:使用 Ljung-Box 檢定
    若接受虛無假設則表示為白雜訊(純隨機序列),回傳值為False
    若拒絕虛無假設則表示序列並非隨機的,回傳值為True
    """
    acf = stattools.acf(ret, nlags = nlags, qstat = False, \
                        fft = True, alpha = None, missing = "drop")
    n = len(ret)
    results = stattools.q_stat(acf, n)
    if isprintsummary:
        print("Show Ljung-Box Q-statistics: ", results[0])
        print("Show corresponding P-values: ", results[1])
    if np.any(results[1] < PVal):  # 只要有一Q統計量顯著大於0,即滿足條件
        if isprintsummary:
            print("- - - - - - - - - - - - -  - - - - - - - - - - - - -")
            print("Some LB stats p < %.2f -> correlated time series!" % PVal)
            print("- - - - - - - - - - - - -  - - - - - - - - - - - - -")
        return True
    else:  # All p-values > PVal, accept H0 (white noise time series)
        if isprintsummary:
            print("- - - - - - - - - - - - -  - - - - - - - - - - - - -")
            print("All LB stats p > %.2f -> white noise time series!" % PVal)
            print("- - - - - - - - - - - - -  - - - - - - - - - - - - -")
        return False
Exemplo n.º 4
0
def plot_correlogram(x, fig, axes, lags=40, title=None):
    lags = min(10, int(len(x) / 5)) if lags is None else lags
    x.plot(ax=axes[0][0])
    q_p = np.max(q_stat(acf(x, nlags=lags), len(x))[1])
    stats = f'Q-Stat: {np.max(q_p):>8.2f}\nADF: {adfuller(x)[1]:>11.2f} \nHurst: {round(hurst(x.values),2)}'
    axes[0][0].text(x=.02, y=.85, s=stats, transform=axes[0][0].transAxes)
    probplot(x, plot=axes[0][1])
    mean, var, skew, kurtosis = moment(x, moment=[1, 2, 3, 4])
    s = f'Mean: {mean:>12.2f}\nSD: {np.sqrt(var):>16.2f}\nSkew: {skew:12.2f}\nKurtosis:{kurtosis:9.2f}'
    axes[0][1].text(x=.02, y=.75, s=s, transform=axes[0][1].transAxes)
    plot_acf(x=x, lags=lags, zero=False, ax=axes[1][0])
    plot_pacf(x=x, lags=lags, zero=False, ax=axes[1][1])
    axes[1][0].set_xlabel('Lag')
    axes[1][1].set_xlabel('Lag')
    fig.suptitle(title, fontsize=20)
    fig.tight_layout()
    fig.subplots_adjust(top=.9)
Exemplo n.º 5
0
def plot_correlogram(x, lags=None, title=None):    
    lags = min(10, int(len(x)/5)) if lags is None else lags
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 8))
    x.plot(ax=axes[0][0], title='Time Series')
    x.rolling(21).mean().plot(ax=axes[0][0], c='k', lw=1)
    q_p = np.max(q_stat(acf(x, nlags=lags), len(x))[1])
    stats = f'Q-Stat: {np.max(q_p):>8.2f}\nADF: {adfuller(x)[1]:>11.2f}'
    axes[0][0].text(x=.02, y=.85, s=stats, transform=axes[0][0].transAxes)
    probplot(x, plot=axes[0][1])
    mean, var, skew, kurtosis = moment(x, moment=[1, 2, 3, 4])
    s = f'Mean: {mean:>12.2f}\nSD: {np.sqrt(var):>16.2f}\nSkew: {skew:12.2f}\nKurtosis:{kurtosis:9.2f}'
    axes[0][1].text(x=.02, y=.75, s=s, transform=axes[0][1].transAxes)
    plot_acf(x=x, lags=lags, zero=False, ax=axes[1][0])
    plot_pacf(x, lags=lags, zero=False, ax=axes[1][1])
    axes[1][0].set_xlabel('Lag')
    axes[1][1].set_xlabel('Lag')
    fig.suptitle(title, fontsize=14)
    sns.despine()
    fig.tight_layout()
    fig.subplots_adjust(top=.9)
Exemplo n.º 6
0
def many_paras():
    """
    p-value本质是控制假阳性率(False positive rate,FPR)
    q-value 控制的是FDR (false discovery rate)
    Q-statistic: Qlb=T*(T+2)*sigma(j=1,p)(rj^2/(T-j))  rj残差序列,j阶自相关系数,T观测值的个数,p滞后阶数。
    FDR = E(V/R) 错误发现次数V,总的拒绝次数R
    acf: 自相关系数 -- y(t)= a0 + a1*y(t-1) + epsilon
         p(x(i)|x(i-h)) :sigma(i=1,n-h) ((x(i)-mu)*(x(i+h)-mu)/sigma(i=1,n) ((x(i)-mu)^2))
    pacf: 偏自相关系数,k-1个时间滞后 作为已知,只求k -- y(t)= a0 + a1*y(t-1) + ... a1*y(t-k) + epsilon
         p(x(i)..x(i-k)|x(i-1)x(i-k+1)) :
    ARMA(p,q): AR代表p阶自回归过程,MA代表q阶移动平均过程
    ARIMA模型是在ARMA模型的基础上多了差分的操作。
    ADF: 白噪声随机干扰项的一阶自回归过程。用单位根 检验,存在就是非平稳。y(t)= mu + fi*y(t-1) + epsilon。p阶要求 p个根的和小于1。
    Sma: 移动平均  
    wma: 加权移动平均
    ema: 指数移动平均
    ewma: 指数加权移动平均
    OBV: On Balance Volume, 多空比率净额= [(收盘价-最低价)-(最高价-收盘价)] ÷( 最高价-最低价)×V
    :return: 
    """
    # 1. 计算自相关系数
    acfs = stattools.acf(SHRet)
    # 绘制自相关系数图
    plot_acf(SHRet, use_vlines=True, lags=30)
    # 2. 计算偏自相关系数
    pacfs = stattools.pacf(SHRet)
    plot_pacf(SHRet, use_vlines=True, lags=30)
    # 3. 进行ADF单位根检验,并查看结果;
    adfSHRet = ADF(SHRet)
    print(adfSHRet.summary().as_text())
    # 4. Q 统计
    LjungBox1 = stattools.q_stat(stattools.acf(SHRet)[1:13], len(SHRet))
    print(LjungBox1)
    # 5. lag即为上述检验表达式中的m,在这里我们选择检验12阶的自相关系数。
    LjungBox = stattools.q_stat(stattools.acf(CPItrain)[1:12], len(CPItrain))
    # order表示建立的模型的阶数,c(1,0,1)表示建立的是ARMA(1,1)模型;
    # 中间的数字0表示使用原始的、未进行过差分(差分次数为0)的数据;
    model1 = arima_model.ARIMA(CPItrain, order=(1, 0, 1)).fit()
    model1.summary()
    model1.conf_int()
    # 6. 绘制时间序列模拟的诊断图
    stdresid = model1.resid / math.sqrt(model1.sigma2)
    plt.plot(stdresid)
    plot_acf(stdresid, lags=20)
    LjungBox = stattools.q_stat(stattools.acf(stdresid)[1:13], len(stdresid))
    print(LjungBox[1][-1])
    print(model1.forecast(3)[0])
    # 7. Autoregressive conditional heteroskedasticity model 自回归条件异方差模型
    # y(t)=b*x(t)+epsilon(t)
    # epsilon(t)^2=a0+a1*epsilon(t-1)^2+a2*epsilon(t-2)^2+n(t)
    # \sigma_t^{2}=\omega+\sum_{i=1}^{p}\alpha_{i}\epsilon_{t-i}^{2}
    # n(t)独立同分布 期望为0,var(n^2)=r^2
    am = arch_model(SHret)
    model = am.fit(update_freq=0)
    print(model.summary())
    # 8. 对子 的 处理
    pt = TradeTool()
    SSD = pt.SSD(priceAf, priceBf)
    SSDspread = pt.SSDSpread(priceAf, priceBf)
    SSDspread.describe()
    coefficients = pt.cointegration(priceAf, priceBf)
    CoSpreadF = pt.CointegrationSpread(priceA, priceB, formPeriod, formPeriod)
    CoSpreadTr = pt.CointegrationSpread(priceA, priceB, formPeriod,
                                        tradePeriod)
    CoSpreadTr.describe()
    bound = pt.calBound(priceA, priceB, 'Cointegration', formPeriod, width=1.2)
    # 9. 配对 选点
    trtl = TradeTool()
    account = trtl.TradeSimPair(PAt, PBt, position)

    # 10. momentum function
    et = ElementTool()
    et.momentum(Close, 5).tail(n=5)
    momen35 = et.momentum(Close, 35)
    signal = []
    for i in momen35:
        if i > 0:
            signal.append(1)
        else:
            signal.append(-1)
    signal = pd.Series(signal, index=momen35.index)
    signal.head()
    tradeSig = signal.shift(1)
    ret = Close / Close.shift(1) - 1
    # ret=ret['2014-02-20':]
    # ret.head(n=3)
    Mom35Ret = ret * (signal.shift(1))
    Mom35Ret[0:5]
    real_Mom35Ret = Mom35Ret[Mom35Ret != 0]
    real_ret = ret[ret != 0]

    Rsi12 = et.rsi(BOCMclp, 12)
    # 策略
    rsi6 = et.rsi(BOCMclp, 6)
    rsi24 = et.rsi(BOCMclp, 24)
    # rsi6捕捉买卖点
    Sig1 = []
    for i in rsi6:
        if i > 80:
            Sig1.append(-1)
        elif i < 20:
            Sig1.append(1)
        else:
            Sig1.append(0)

    date1 = rsi6.index
    Signal1 = pd.Series(Sig1, index=date1)
    Signal1[Signal1 == 1].head(n=3)
    Signal1[Signal1 == -1].head(n=3)

    Signal2 = pd.Series(0, index=rsi24.index)
    lagrsi6 = rsi6.shift(1)
    lagrsi24 = rsi24.shift(1)
    for i in rsi24.index:
        if (rsi6[i] > rsi24[i]) & (lagrsi6[i] < lagrsi24[i]):
            Signal2[i] = 1
        elif (rsi6[i] < rsi24[i]) & (lagrsi6[i] > lagrsi24[i]):
            Signal2[i] = -1

    signal = Signal1 + Signal2
    signal[signal >= 1] = 1
    signal[signal <= -1] = -1
    signal = signal.dropna()
    tradSig = signal.shift(1)

    tt = TradeTool()
    BuyOnly = tt.strategy_analy(buy, ret)
    SellOnly = tt.strategy_analy(sell, ret)
    Trade = tt.strategy_analy(tradSig, ret)
    Test = pd.DataFrame({
        "BuyOnly": BuyOnly,
        "SellOnly": SellOnly,
        "Trade": Trade
    })

    # 累计收益率
    cumStock = np.cumprod(1 + ret) - 1
    cumTrade = np.cumprod(1 + tradeRet) - 1

    # 12. 移动平均线
    sma5 = et.smaCal(Close, 5)
    # 12. 加权移动平均线
    wma5 = et.wmaCal(Close, w)
    # 12. 指数移动平均线
    Ema = et.emaCal(Close, period)
    print(Ema)
    # 12. 指数加权移动平均线
    Ewma = et.ewmaCal(Close, 5, 0.2)

    # 13. 布林带
    UnicomBBands = et.bbands(Close, 20, 2)
    print(UnicomBBands)
    multiplier = [1, 1.65, 1.96, 2, 2.58]
    price2010 = Close['2010-01-04':'2010-12-31']
    tt.CalBollRisk(price2010, multiplier)

    # 14. 性能
    btt = BackTestTool()
    Performance1 = btt.perform(Close, tradSignal1)
    print(Performance1)

    # 15. 交易, 回测
    KDtrade = btt.trade(KDSignal, close)
    btt.backtest(KDtrade.Ret, KDtrade.KDtradeRet)

    # 16. 上下突破
    KDupbreak = et.upbreak(KValue, DValue) * 1
    KDupbreak[KDupbreak == 1].head()

    KDdownbreak = et.downbreak(KValue, DValue) * 1
    KDdownbreak[KDdownbreak == 1].head()
    # "金叉"与"死叉"交易策略绩效表现
    btt.backtest(KDbreak.Ret, KDbreak.KDbreakRet)

    # 17. 成交量指标
    cumUpVol = et.VOblock(UpVol)
    cumDownVol = et.VOblock(DownVol)
    ALLVol = np.array([cumUpVol, cumDownVol]).transpose()

    # 18. 判断持有
    hold = tt.judge_hold(trade)

    # 19. 单交易
    TradeAccount = tt.TradeSim(close, hold)
    print(TradeAccount)
Exemplo n.º 7
0
plot_acf(SHRet, use_vlines=True, lags=30)

plot_pacf(SHRet, use_vlines=True, lags=30)

SHclose = SHindex.Clsindex
SHclose.plot()
plt.title('2014-2015年上证综指收盘指数时序图')

SHRet.plot()
plt.title('2014-2015年上证综指收益率指数时序图')

plot_acf(SHRet, use_vlines=True, lags=30)
plot_pacf(SHRet, use_vlines=True, lags=30)
plot_acf(SHclose, use_vlines=True, lags=30)

adfSHRet = ADF(SHRet)
print(adfSHRet.summary().as_text())

adfSHclose = ADF(SHclose)
print(adfSHclose.summary().as_text())

whiteNoise = np.random.standard_normal(size=500)
plt.plot(whiteNoise, c='b')
plt.title('White Noise')

LjungBox1 = stattools.q_stat(stattools.acf(SHRet)[1:13], len(SHRet))
print(LjungBox1)
print(LjungBox1[1][-1])
LjungBox2 = stattools.q_stat(stattools.acf(SHclose)[1:13], len(SHRet))
print(LjungBox2[1][-1])
Exemplo n.º 8
0
##自相关系数
acfs=stattools.acf(SH_ret)

##偏自相关系数
pacfs=stattools.pacf(SH_ret)

plot_acf(SH_ret,use_vlines=True,lags=30)

SH_ret.plot()
plt.title('return')

SH_close.plot()
plt.title('close price')


adfSH_ret=ADF(SH_ret)
print(adfSH_ret)

adfSH_close=ADF(SH_close)
print(adfSH_close)

 LjungBox_ret=stattools.q_stat(acfs,len(SH_ret))
 LjungBox_ret[1][-1]

HS300_data['close']['2010-01-01':'2016-12-31']
HS300_data['2010-01-01':'2016-12-31']

model1=arima_model.ARIMA(SH_ret,order=(2,0,1)).fit()
model1.summary()

stattools.arma_order_select_ic(SH_ret,max_ma=4)
Exemplo n.º 9
0
                                 end)

returns = RJ['close_price'].pct_change().dropna()
returns.name = 'return'
# returns.plot()
# returns.column = ['return']
# print(returns)

# 计算自相关系数
# acfs = stattools.acf(returns)
# # print(acfs)
# # 偏自相关系数
# pacfs = stattools.pacf(returns)
# # print(pacfs)
# # 自相关性图
# plot_acf(returns,use_vlines=True,lags=30)
# # 偏自相关性图
# plot_pacf(returns,use_vlines=True,lags=30)
# plot_acf(RJ['close_price'],use_vlines=True,lags=30)
#
# plt.show()

# 单位根检验
# adfRJ= ADF(returns)
# print(adfRJ.summary().as_text())
# adfClose = ADF(RJ['close_price'])
# print(adfClose.summary().as_text())

# 白噪声检验
LB = stattools.q_stat(stattools.acf(returns), len(returns))
print(LB)
Exemplo n.º 10
0
@author: Liu
"""

import numpy as np
import pandas as pd
import pandas_datareader as web
import matplotlib.pyplot as plt
import datetime

start=datetime.datetime(2018,1,1)
end=datetime.datetime(2019,6,30)
nasdaq=web.DataReader('^IXIC','yahoo',start,end)
nasdaq=nasdaq.dropna()
nasdaq=pd.DataFrame(nasdaq['Adj Close'].values,index=pd.to_datetime(nasdaq.index),columns=['Price'])
nasdaq=(nasdaq-nasdaq.shift(1))/nasdaq.shift(1)
nasdaq=nasdaq.dropna()
nasdaq.columns=['Return']
nasdaq.tail(3)
traindata=nasdaq[:-3]

from arch.unitroot import ADF
result=ADF(traindata.Return,max_lags=10)
print(result.summary().as_text())

from statsmodels.tsa import stattools
LjungBox=stattools.q_stat(stattools.acf(traindata)[1:12],len(traindata))
LjungBox[1][-1]

import statsmodels.graphics.tsaplots as ts
ts.plot_acf(traindata,use_vlines=True, lags=30)
Exemplo n.º 11
0
print(adfReutn.summary().as_text())

# plot_acf(logReturn, lags=20)
# plot_pacf(logReturn, lags=20)

model1 = arima_model.ARIMA(logReturn.values, order=(2, 0, 0)).fit()
model2 = arima_model.ARIMA(logReturn.values, order=(0, 0, 2)).fit()

print("model1.aic=%f \t model2.aic=%f" % (model1.aic, model2.aic))
print(model2.sigma2)
strresid = model2.resid / math.sqrt(model2.sigma2)
# strresid.plot()
plt.subplot(211)
plt.plot(strresid)
# plot_acf(strresid , lags=20)
LinjiuBox = stattools.q_stat(stattools.acf(strresid)[1:13], len(strresid))
print("LinjiuBox:", LinjiuBox[1][-1], '\n', LinjiuBox)
# plt.subplot(212)

print(model2.forecast(10)[0])
print(type(model2.forecast(10)[0]))

test = pd.DataFrame(model2.forecast(10)[0])
test.index = logReturn1.index
test.columns = ['forecast']
test['return1'] = logReturn1.values
logReturn1.columns = ['return1']

print("test:\n", test)

# logReturn1['forecast'] = pd.Series( model2.forecast(10)[0])
Exemplo n.º 12
0
#取对数以增加平稳性
inData = np.log(inData)  #––––––––––––––––––

#在进行运算之前可以对数据进行归一化,进而降低loss
#scaler = MinMaxScaler()
scaler = MinMaxScaler(feature_range=(0, 1))
inData = scaler.fit_transform(inData.reshape(-1, 1))
'''
#数据平稳化,使Adfuller指数小于-3.435298,并且p-value小于0.05
ADF = adfuller(inData.ravel(),1)
print("ADF:")
print(ADF)
'''

stattools.q_stat(stattools.acf(inData)[1:13], len(inData))[1][-1]
plot_acf(inData, lags=30)
plot_pacf(inData, lags=30)

#定阶ARMA(p,q)
'''order = stattools.arma_order_select_ic(inData,max_ar=3,max_ma=3,ic=['aic','bic','hqic'])
print("(p,q):")
pq = order.bic_min_order
print(order.bic_min_order)#(p,q)'''

#将数据按站点分为SITE_SIZE组
site_names = []  #站点数据列表
site_cnames = []  #站点名字列表
for num in range(0, SITE_SIZE):
    site_cnames.append(df.at[num * DATA_SIZE, u'事发街道'])
    if num == 0:
Exemplo n.º 13
0
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from statsmodels.tsa import stattools
from arch import arch_model

SHret = pd.read_table('TRD_IndexSum.txt', index_col='Trddt', sep='\t')
SHret.index = pd.to_datetime(SHret.index)
SHret = SHret.sort_index()
plt.subplot(211)
plt.plot(SHret**2)
plt.xticks([])
plt.title('Squared Daily Return of SH Index')
plt.subplot(212)
plt.plot(np.abs(SHret))
plt.title('Absolute Daily Return of SH Index')

LjungBox = stattools.q_stat(stattools.acf(SHret**2)[1:13], len(SHret))
print(LjungBox[1][-1])

am = arch_model(SHret)
model = am.fit(update_freq=0)
print(model.summary())
Exemplo n.º 14
0
close.plot()
plt.title('2014-2015年加權股價指數收盤指數時序圖 ')

taiexRet.plot()
plt.title('2014-2015年加權股價指數收益率指數時序圖')

plot_acf(taiexRet,use_vlines=True,lags=30)
plot_pacf(taiexRet,use_vlines=True,lags=30)
plot_acf(close,use_vlines=True,lags=30)

adf_taiexRet=ADF(taiexRet)
print(adf_taiexRet.summary().as_text())
adfclose=ADF(close)
print(adfclose.summary().as_text())


#生成純隨機序列
whiteNoise=np.random.standard_normal(size=500)

#繪制該序列圖
plt.plot(whiteNoise,c='b')
plt.title('White Noise')

LjungBox1=stattools.q_stat(stattools.acf(taiexRet)[1:13],len(taiexRet))
LjungBox1
LjungBox1[1][-1]

LjungBox2=stattools.q_stat(stattools.acf(close)[1:13],len(taiexRet))
LjungBox2[1][-1]

Exemplo n.º 15
0
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(df["log_diff_12"].dropna(), method="ywmle", zero=False)

# Method : tsa.seasonal_decompose()
"""
Seasonal decomposition using moving averages.
"""
help(tsa.seasonal_decompose)

# Method : statsmodels.tsa.stattools.q_stat()
"""
Compute Ljung-Box Q Statistic.
Returns q-statistics, p-value
"""
from statsmodels.tsa.stattools import q_stat
q_stat(df["log_diff_12"].dropna(), nobs=len(df["log_diff_12"].dropna()))

# Class : statsmodels.api.tsa.arima.ARIMA()
"""
ARIMA model
"""
m = statsmodels.api.tsa.arima.ARIMA(endog=df["value"],
                                    order=(1, 1, 0),
                                    exog=df["time"])
res = m.fit()
print(res.summary())

# Method : statsmodels.api.qqplot()
"""
Q-Q plot of the quantiles of x versus the quantiles/ppf of a distribution.
"""
Exemplo n.º 16
0
from statsmodels.tsa import stattools
import matplotlib.pyplot as plt

import numpy as np
from arch import arch_model

indexRet = pd.read_csv('index.csv', sep='\t')
indexRet.index = pd.to_datetime(indexRet.Date)
indexRet.head()
np.unique(indexRet.CoName)

taiexRet = indexRet.loc[indexRet.CoName == 'TSE Taiex    '].ROI
taiexRet.head()
taiexRet.tail()
taiexRet = taiexRet.astype(np.float).dropna()
#繪制收益率平方序列圖
plt.subplot(211)
plt.plot(taiexRet**2)
plt.xticks([])
plt.title('Squared Daily Return of taiex')

plt.subplot(212)
plt.plot(np.abs(taiexRet))
plt.title('Absolute Daily Return of taiex')

LjungBox = stattools.q_stat(stattools.acf(taiexRet**2)[1:13], len(taiexRet))
LjungBox[1][-1]

am = arch_model(taiexRet)
model = am.fit(update_freq=0)
print(model.summary())
Exemplo n.º 17
0
    for code in codes:
        if ar>lines*columns:
            pdf.savefig(fig)
            fig = plt.figure()
            ar=1
        fig.add_subplot(lines, columns, ar)
        closevalues = pd.Series(np.array(feed.getDataSeries(instrument=code).getCloseDataSeries()))
        tsaplots.plot_acf(np.log(closevalues).diff().dropna(), alpha=0.05, lags=40, ax=fig.gca(), title=code)
        ar += 1
    pdf.savefig(fig)
    plt.close('all')

#################################################################################################
#Trying to fit an ARMA model
#################################################################################################
stattools.q_stat()
stattools.acf()

closevalues = pd.Series(np.array(feed.getDataSeries(instrument=codes[1]).getCloseDataSeries()))
# arma = smapi.tsa.ARMA(np.array(np.log(closevalues).diff().dropna()), (0, 5)).fit(maxiter=10000)
arma = smapi.tsa.ARMA(np.array(closevalues), (0, 5)).fit(maxiter=10000)
pvalue = diagnostic.acorr_ljungbox(arma.resid, lags=[20])[1]
print(pvalue)

results = {}
for code in codes:
    final_arma = None
    final_aic = 1e200
    final_order = None
    for ar in range(1, 5):
        for ma in range(1, 5):
Exemplo n.º 18
0
start = datetime.datetime(2018, 1, 1)
end = datetime.datetime(2019, 6, 30)
nasdaq = web.DataReader('^IXIC', 'yahoo', start, end)
nasdaq = nasdaq.dropna()
nasdaq = pd.DataFrame(nasdaq['Adj Close'].values,
                      index=pd.to_datetime(nasdaq.index),
                      columns=['Price'])
nasdaq = (nasdaq - nasdaq.shift(1)) / nasdaq.shift(1)
nasdaq = nasdaq.dropna()
nasdaq.columns = ['Return']
traindata = nasdaq

plt.subplot(211)
plt.plot(traindata**2)
plt.xticks([])

plt.subplot(212)
plt.plot(np.abs(traindata))

from statsmodels.tsa import stattools

LjungBox = stattools.q_stat(stattools.acf(traindata**2)[1:13], len(traindata))
LjungBox[1][-1]

from arch import arch_model

am = arch_model(traindata)
model = am.fit(update_freq=0)
print(model.summary())
Exemplo n.º 19
0
import pandas as pd
import matplotlib.pyplot as plt

#5.
CRSPday = pd.read_csv('Data/Part4/002/CRSPday.csv')
ibm = CRSPday.ibm
ibm.plot()

from statsmodels.graphics.tsaplots import *
plot_acf(ibm, lags=20)

from statsmodels.tsa import stattools
LjungBox = stattools.q_stat(stattools.acf(ibm)[1:13], len(ibm))
LjungBox[1][-1]

#6.
ge = CRSPday.iloc[:, 3]
ge.plot()

plot_acf(ge, lags=20)

LjungBox = stattools.q_stat(stattools.acf(ge)[1:2], len(ge))
LjungBox[1][-1]

LjungBox = stattools.q_stat(stattools.acf(ge)[1:9], len(ge))
LjungBox[1][-1]

#7.
SP500 = pd.read_csv('Data/Part4/002/SP500.csv')
r500 = SP500.r500
r500.plot()
Exemplo n.º 20
0
    fig = plot_acf(SHClose, use_vlines=True, lags=30)
    fig.savefig("SHRet_acf.png")

    # 单位根检验
    adfSHRet = ADF(SHRet)
    print(adfSHRet.summary().as_text())
    adfSHClose = ADF(SHClose)
    print(adfSHClose.summary().as_text())

    # 白噪声
    whiteNoise = np.random.standard_normal(size=500)
    fig = plt.figure()
    plt.plot(whiteNoise, c="b")
    fig.savefig("whiteNoise.png")
    # 上证综指的白噪声检测
    LB1 = stattools.q_stat(stattools.acf(SHRet)[1:13], len(SHRet))
    print(LB1)
    print(LB1[1][-1])
    LB2 = stattools.q_stat(stattools.acf(SHClose)[1:13], len(SHClose))
    print(LB2[1][-1])

    # ARMA建模
    cpi = pd.read_csv("CPI.csv", index_col="time")
    cpi.index = pd.to_datetime(cpi.index)
    print(cpi.head())
    print(cpi.shape)
    # 训练集
    CPITrain = cpi[3:]
    # 绘制时序图
    fig = plt.figure()
    plt.plot(cpi)
Exemplo n.º 21
0
adfshindex = ADF(shindex)
print(adfshindex.summary().as_text())

whitenoise = np.random.standard_normal(500)
plt.plot(whitenoise,c = 'b')

cpi = ts.get_cpi()
cpi.index = pd.to_datetime(cpi['month'])
cpi = cpi['cpi']
cpitrain = cpi['2016-01-01':'2000-01-01']
cpitrain.plot()
#是否平稳
print(ADF(cpitrain,max_lags=10).summary().as_text())
#是否白噪声
ljb0 = stattools.q_stat(stattools.acf(cpitrain)[1:12],len(cpitrain))
ljb0[1][-1]

#识别ARMA模型参数pq
plot_acf(cpitrain,use_vlineEs=True,lags=30)
plot_pacf(cpitrain,use_vlines=True,lags=30)

model1 = arima_model.ARIMA(cpitrain.values,order=(1, 0, 1)).fit()
model1.summary()

p = np.arange(1,4)
q = np.arange(1,4)
result = dict()
for i in p:
    for j in q:
        model1 = arima_model.ARIMA(cpitrain.values, order=(i, 0, j)).fit()
Exemplo n.º 22
0
import pandas as pd

#3.
CRSPday = pd.read_csv('Data/Part4/004/CRSPday.csv')
ibm = CRSPday.ibm
ibm.plot()

from statsmodels.graphics.tsaplots import *
plot_acf(ibm**2, lags=20)

from statsmodels.tsa import stattools
LjungBox = stattools.q_stat(stattools.acf(ibm**2)[1:13], len(ibm))
LjungBox[1][-1]

#4.
import pandas_datareader.data as web
import datetime as dt
google = web.DataReader('GOOGL', 'yahoo', dt.datetime(2004, 1, 1),
                        dt.datetime(2015, 12, 31))
google = google.asfreq('M', 'ffill', 'end')
googleRet = (google.Close - google.Close.shift(1)) / google.Close.shift(1)
googleRet = googleRet.dropna()

googleRet.plot()
plot_acf(googleRet, lags=20)
plot_pacf(googleRet, lags=20)

LjungBox = stattools.q_stat(stattools.acf(googleRet)[1:13], len(googleRet))
LjungBox[1][-1]

(googleRet**2).plot()
Exemplo n.º 23
0
                trend='c',
                method='aeg',
                maxlag=None,
                autolag='aic',
                return_results=None)
#return
#coint_t : float
#t-statistic of unit-root test on residuals
#pvalue : float
#MacKinnon’s approximate, asymptotic p-value based on MacKinnon (1994)
#crit_value : dict
#Critical values for the test statistic at the 1 %, 5 %, and 10 % levels based on regression curve. This depends on the number of observations
'''Ljung-Box白噪声检验'''
x = stattools.acf(array, nlags=40, unbiased=False, qstat=False, alpha=None)
nobs = array
stattools.q_stat(x, nobs, type='ljungbox')
# x所检验的自相关系数序列 nobs计算自相关系数序列x所用的样本数n
# return
#检验的统计量array
#p值的array 若p小于0.05 则拒绝原假设 则不是白噪声 存在自相关性
# %%
# ================================================================
# 3. 回归
#=================================================================
# OLS ----------------------------
'''OLS'''
array1 = st.add_constant(array)
model = st.OLS(array_y, array1).fit()
print(model.summary())
print(model.fittedvalues)  # fit values
print(model.bic)  # bic
Exemplo n.º 24
0
logReturn=pd.Series((np.log(clprice))).diff().dropna()
logReturn.plot()

adf=ADF(logReturn,lags=6)
print(adf.summary().as_text())

plot_acf(logReturn,lags=20)
plot_pacf(logReturn,lags=20)
model1=arima_model.ARIMA(logReturn.values,order=(0,0,2)).fit()
model2=arima_model.ARIMA(logReturn.values,order=(2,0,0)).fit()
model1.aic
model2.aic

import math
stdresid=model2.resid/math.sqrt(model2.sigma2)
stdresid.plot()
plot_acf(stdresid,lags=20)
LjungBox=stattools.q_stat(stattools.acf(stdresid)[1:13],len(stdresid))
LjungBox[1][-1]

pd.Series(model2.forecast(10)[0]).plot()








Exemplo n.º 25
0
 def q_stat(self, timeseries):
     autocorrelation_coefs = stattools.acf(timeseries)
     result = stattools.q_stat(autocorrelation_coefs)
     QstatResult = namedtuple('QstatResult', 'statistic pvalue')
     return QstatResult(result[0], result[1])
Exemplo n.º 26
0
CPI=CPI.sort_index()
CPItrain=CPI[:-3]
CPItrain.tail(n=3)
CPItest = CPI[-3:]
CPItest


CPI.plot(title='CPI 2001-2014')


from arch.unitroot import ADF
CPItrain=CPItrain.dropna()
print(ADF(CPItrain,max_lags=10).summary().as_text())

from statsmodels.tsa import stattools
LjungBox=stattools.q_stat(stattools.acf(CPItrain)[1:12],len(CPItrain))
LjungBox[1][-1] 


from statsmodels.graphics.tsaplots import *
import matplotlib.pyplot as plt
#將畫面一分為二
axe1=plt.subplot(121)
axe2=plt.subplot(122)
#在第一個畫面中畫出序列的自相關係數圖 
plot1=plot_acf(CPItrain,lags=30,ax=axe1)
#在第二個畫面中畫出序列的偏自相關係數圖
plot2=plot_pacf(CPItrain,lags=30,ax=axe2)

from statsmodels.tsa import arima_model
#order表示建立的模型的階數,c(1,0,1)表示建立的是ARMA(1,1)模型;
Exemplo n.º 27
0
#model = sm.tsa.ARMA(y, (2, 1)).fit(trend='nc', disp=0)

#Descriptive TSA Statistics
stools.adfuller(y)
stools.kpss(y)
#Plot ACF and PACF
tplot.plot_acf(y)
tplot.plot_pacf(y)

#Fir ARMA Model
tsmodel = sm.tsa.ARMA(y, (2, 1)).fit(trend='nc', disp=0)

residuals = tsmodel.resid

stools.q_stat(tsmodel.resid, nobs=len(tsmodel.resid))

fig = plt.figure()
qq_ax = fig.add_subplot()
sm.qqplot(y, line='s', ax=qq_ax)
plt.show()

###############################################################################
#5. Load package datasets. Use data(faithful) to import the waiting time (in min)
#   between eruptions and the duration (in min) of the eruption for the Old Faithful
#   geyser in Yellowstone National Park, Wyoming, USA. We would like to forecast
#   when the next ejection would be.
#   • The length of the samples? How many variables?
#   • Perform unit root tests on the data. Is there unit-root in the data?
#   • Plot ACF and PACF to determine the appropriate order
#   • Fitting the time series model