def bestfit(x, y): return (x + y) / 2 sample_size = 500 # Generate some data x = np.arange(sample_size) y1 = np.sin(x / 20) y2 = y1 + np.random.uniform(size=sample_size) y3 = y2 + np.random.uniform(size=sample_size) yr = np.random.uniform(size=sample_size) bf = bestfit(y2, y3) coef = am.ARIMA(bf, [1, 0, 0]).fit().params tau = 252 speedReversion = abs(coef[0]) / tau equilibriumMean = coef[1] / (1 - coef[0]) SDcointResdiual = math.sqrt(2 * speedReversion * bf.var() / (1 - math.exp(-2 * speedReversion * tau))) SDEq = SDcointResdiual / math.sqrt(2 * speedReversion) # We are looking for a shortish halflife. > 50 days is too much halflife = math.log(2) / speedReversion short_mean = pd.rolling_mean(bf, 5) long_mean = pd.rolling_mean(bf, 20)
def _seasonal_arima(self, endog=None, exog=None, p=None, d=None, q=None, imodels=None, include_holidays=None, ift_matrix=None, stepwise_fit=None, optimize=None): """ This function runs the ARIMA model with different Fourier transformations based on different number of frequencies. :param pandas.Dttaframe endog: A pandas dataframe storing the endogenous time series :param pandas.Dttaframe exog: A pandas dataframe storing the exogenous pulses obtained through the Fourier transformation and / or the a binary one hot encoding different US holidays :param int p: A tuple containing the minimum and maximum of the auto-regressive terms to be considered in the model :param int d: A tuple containing the minimum and maximum of the differencing to be considered in the model :param int q: A tuple containing the minimum and maximum of the moving average terms to be considered in the model :param int imodels: The current model run based on the current exogenous obtained through first imodels*2 many most relevant frequencies from the Fourier transform :param bool include_holidays: Whether to consider holidays as exogenous :param list ift_matrix: A list of list All exogenous variables where the ith column is the inverse Fourier transformation of the time series with first i*2 most relevant frequencies :param int train_len: Storing the length of the time series :param int pred_len: Storing the length of the future time points to predict :param bool arima_error: Storing whether there is any exception occurred in the auto_arima run :param list stepwise_fit: A list storing different model object :param bool optimize: Flag to identify whether called from hyperparameter optimization :param list x_pred: list storing exogenous variable corresponding to the time point to predict :return: """ import numpy as np import statsmodels.tsa.arima_model as arima # Extract the exogenous variable generated based on (imodels * 2) number of most significant # frequencies if imodels > 0: fourier_exog = ift_matrix[0].reshape(-1, 1)[:, 0].reshape(-1, 1) if not include_holidays: exog = np.real(fourier_exog) else: exog['fourier_feature'] = np.float64( np.real(fourier_exog[:, 0])) # This check is required due to a bug in statsmodel arima which inflates the predictions and std error # for time series containing only 0's. Can be removed if fixed in a later version of statsmodel # or pyramid if np.count_nonzero(endog) == 0: idx_max = len(endog) // 2 idx = int(np.random.randint(0, idx_max, 1)[0]) endog[idx] = abs(np.random.normal(0, 1e-3, 1)[0]) try: stepwise_fit.append( arima.ARIMA(endog=endog, exog=exog, order=(p, d, q)).fit(seasonal=False, trace=False, method='css', solver='bfgs', error_action='ignore', stepwise_fit=True, warn_convergence=False, disp=False)) except Exception as e: raise LADStructuralError(message=str(e)) return 0
print(data.head()) # 最后5条数据作为验证数据 train_data = data.iloc[:-5, :] test_data = data.iloc[-5:, :] # print(test_data) # 定阶 pmax = len(train_data['CWXT_DB:184:D:\\']) // 10 qmax = len(train_data['CWXT_DB:184:D:\\']) // 10 bic_matrix = [] for p in range(pmax): temp = [] for q in range(qmax): try: result = arima_model.ARIMA(train_data['CWXT_DB:184:D:\\'], (p, 1, q)).fit().bic # result if np.isnan(result) else None temp.append(None if pd.isna(result) else result) # temp.append(result) except: temp.append(None) bic_matrix.append(temp) bic_matrix = pd.DataFrame(bic_matrix) # 平铺 找最小值的位置 print(bic_matrix) print(pd.isna(bic_matrix)) p, q = bic_matrix.stack().idxmin() # BIC最小的p值和q值为:1、1 print(u'BIC最小的p值和q值为:%s、%s' % (p, q))
# -*- coding: UTF-8 -*- """ file:Quant24.py data:2019-10-1715:43 author:Grey des: """ import pandas as pd from arch.unitroot import ADF from statsmodels.tsa import arima_model CPI = pd.read_csv('data/024/CPI.csv', index_col='time') CPI.index = pd.to_datetime(CPI.index) CPItrain = CPI[3:] CPItrain = CPItrain.dropna().CPI model1 = arima_model.ARIMA(CPItrain, order=(1, 0, 1)).fit() model1.summary() model2 = arima_model.ARIMA(CPItrain, order=(1, 0, 2)).fit() model2.summary()
fig.savefig("timecpi.png") # 用单位根检验序列的稳定性 CPITrain = CPITrain.dropna().CPI print(ADF(CPITrain, max_lags=10).summary().as_text()) # 用LB检验cpi序列是否为白噪声 LB = stattools.q_stat(stattools.acf(CPITrain)[1:12], len(CPITrain)) print(LB[1][-1]) # 模型识别与估计 fig = plt.figure() axe1 = plt.subplot(121) axe2 = plt.subplot(122) plot1 = plot_acf(CPITrain, lags=30, ax=axe1) plot2 = plot_pacf(CPITrain, lags=30, ax=axe2) fig.savefig("model.png") # 参数估计 model1 = arima_model.ARIMA(CPITrain, order=(1, 0, 1)).fit() print(model1.summary()) model2 = arima_model.ARIMA(CPITrain, order=(1, 0, 2)).fit() print(model2.summary()) model3 = arima_model.ARIMA(CPITrain, order=(2, 0, 1)).fit() model4 = arima_model.ARIMA(CPITrain, order=(2, 0, 2)).fit() model5 = arima_model.ARIMA(CPITrain, order=(3, 0, 1)).fit() model6 = arima_model.ARIMA(CPITrain, order=(3, 0, 2)).fit() # 模型诊断 # 计算置信区间 print(model1.conf_int()) print(model6.conf_int()) # 检验残差序列是否为白噪音 stdresid = model6.resid / math.sqrt(model6.sigma2) fig = plt.figure() plt.plot(stdresid)
def ARIMA(series, n): ''' 只讨论一阶差分的ARIMA模型,预测,数字索引从1开始 series:时间序列 n:需要往后预测的个数 ''' series = np.array(series) series = pd.Series(series.reshape(-1)) currentDir = os.getcwd() #当前工作路径 #一阶差分数据 fd = series.diff(1)[1:] plot_acf(fd).savefig(currentDir + '/一阶差分自相关图.png') plot_pacf(fd).savefig(currentDir + '/一阶差分偏自相关图.png') #一阶差分单位根检验 unitP = adfuller(fd)[1] if unitP > 0.05: unitAssess = '单位根检验中p值为%.2f,大于0.05,认为该一阶差分序列判断为非平稳序列' % (unitP) #print('单位根检验中p值为%.2f,大于0.05,认为该一阶差分序列判断为非平稳序列'%(unitP)) else: unitAssess = '单位根检验中p值为%.2f,小于0.05,认为该一阶差分序列判断为平稳序列' % (unitP) #print('单位根检验中p值为%.2f,小于0.05,认为该一阶差分序列判断为平稳序列'%(unitP)) #白噪声检验 noiseP = acorr_ljungbox(fd, lags=1)[-1] if noiseP <= 0.05: noiseAssess = '白噪声检验中p值为%.2f,小于0.05,认为该一阶差分序列为非白噪声' % noiseP #print('白噪声检验中p值为%.2f,小于0.05,认为该一阶差分序列为非白噪声'%noiseP) else: noiseAssess = '白噪声检验中%.2f,大于0.05,认为该一阶差分序列为白噪声' % noiseP #print('白噪声检验中%.2f,大于0.05,认为该一阶差分序列为白噪声'%noiseP) #BIC准则确定p、q值 pMax = int(series.shape[0] / 10) # 一般阶数不超过length/10 qMax = pMax # 一般阶数不超过length/10 bics = list() for p in range(pMax + 1): tmp = list() for q in range(qMax + 1): try: tmp.append(arima_model.ARIMA(series, (p, 1, q)).fit().bic) except Exception as e: #print(str(e)) tmp.append(1e+10) #加入一个很大的数 bics.append(tmp) bics = pd.DataFrame(bics) p, q = bics.stack().idxmin() #print('BIC准则下确定p,q为%s,%s'%(p,q)) #建模 model = arima_model.ARIMA(series, order=(p, 1, q)).fit() predict = model.forecast(n)[0] return { 'model': { 'value': model, 'desc': '模型' }, 'unitP': { 'value': unitP, 'desc': unitAssess }, 'noiseP': { 'value': noiseP[0], 'desc': noiseAssess }, 'p': { 'value': p, 'desc': 'AR模型阶数' }, 'q': { 'value': q, 'desc': 'MA模型阶数' }, 'params': { 'value': model.params, 'desc': '模型系数' }, 'predict': { 'value': predict, 'desc': '往后预测%d个的序列' % (n) } }
plt.plot(ac) plt.grid() plt.show() pac=pacf(dflog.dropna()) plt.plot(pac) plt.grid() plt.show() # Assigning ACF(Aauto-correlation Function) and PACF(Partial Auto-Correlation Function) plots # In[24]: from statsmodels.tsa import arima_model arima=arima_model.ARIMA(df,(2,1,2)) model=arima.fit() # ARIMA(p,d,q)=ARIMA(2,1,2) # p is the number of autoregressive terms, # d is the number of nonseasonal differences needed for stationarity, and # q is the number of lagged forecast errors in the prediction equation. # In[25]: model.plot_predict(start=1,end=204) # In[ ]:
# %% AR Model ed_3h_ar = ed_3h_htm[['Ct']] ar_mdl = ar_model.AR(ed_3h_ar) ar_fit = ar_mdl.fit(maxlag=(WndwDys*DyStps)) ed_3h_ar['prediction'] = ar_fit.predict() ed_3h_ar['MSE'] = MSE(ed_3h_ar.Ct,ed_3h_ar.prediction,WndwDys,DyStps) ed_3h['AR'] = ed_3h_ar.prediction ed_3h_mse['AR'] = ed_3h_ar.MSE # %% ARMA model ed_3h_arima = ed_3h_htm[['Ct']] arima_mdl = arima_model.ARIMA(ed_3h_arima,(30,1,30)) arima_fit = arima_mdl.fit() ed_3h_arima['prediction'] = ed_3h_arima.predict() ed_3h_arima['MSE'] = MSE(ed_3h_arima.Ct,ed_3h_arima.prediction,WndwDys,DyStps) ed_3h['ARIMA'] = ed_3h_arima.prediction ed_3h_mse['ARIMA'] = ed_3h_arima.MSE # %% Plot ed_3h['dates'] = [date2num(date) for date in ed_3h.index] ed_3h_mse['dates'] = [date2num(date) for date in ed_3h_mse.index]
uber_scaler = StandardScaler().fit(df[["uber_sup"]]) df["uber_sup"] = uber_scaler.transform(df[["uber_sup"]]) """ # ARIMA (Time -> Uber). train = df[df["timestamp"] <= threshold]["uber_sup"] test = df[df["timestamp"] >= threshold - 86400]["uber_sup"] # Resampling to get hourly data (otherwise exception). uber_train = train.resample('H').ffill(). \ reindex(pd.date_range(train.index[0], train.index[-1], freq = "H")) uber_test = test.resample('H').ffill(). \ reindex(pd.date_range(test.index[0], test.index[-1], freq = "H")) # Fit model and predict. arima = arima_model.ARIMA(uber_train, (15, 0, 5)).fit() uber_predict = arima.predict(uber_test.index[0], uber_test.index[-1], dynamic=True) mse = math.sqrt(np.mean((uber_predict - uber_test) / 24)**2) print "Root Mean squared error: " + str(mse) """ # Plot figures. fig_path = sys.path[0] + "/fig/time_alone_reg.png" fig, ax = plt.subplots(figsize=(8, 1)) ax.set_xlabel("Time", fontsize = 10) ax.set_ylabel("Value", fontsize = 10) ax.plot(df["uber_data"], c = "r", label = "real") ax.plot(arima.fittedvalues[:-24], c = "k", label = "train") ax.plot(uber_predict, c = "b", label = "test") ax.set_ylim([-2, 4])
#lag即为上述检验表达式中的m,在这里我们选择检验12阶的自相关系数。 LjungBox = stattools.q_stat(stattools.acf(CPItrain)[1:12], len(CPItrain)) LjungBox[1][-1] #将画面一分为二 axe1 = plt.subplot(121) axe2 = plt.subplot(122) #在第一个画面中画出序列的自相关系数图 plot1 = plot_acf(CPItrain, lags=30, ax=axe1) #在第二个画面中画出序列的偏自相关系数图 plot2 = plot_pacf(CPItrain, lags=30, ax=axe2) #order表示建立的模型的阶数,c(1,0,1)表示建立的是ARMA(1,1)模型; #中间的数字0表示使用原始的、未进行过差分(差分次数为0)的数据; #此处我们无需考虑它 model1 = arima_model.ARIMA(CPItrain, order=(1, 0, 1)).fit() model1.summary() #同理,我们建立起其它阶数的模型 model2 = arima_model.ARIMA(CPItrain, order=(1, 0, 2)).fit() model2.summary() model3 = arima_model.ARIMA(CPItrain, order=(2, 0, 1)).fit() model4 = arima_model.ARIMA(CPItrain, order=(2, 0, 2)).fit() model5 = arima_model.ARIMA(CPItrain, order=(3, 0, 1)).fit() model6 = arima_model.ARIMA(CPItrain, order=(3, 0, 2)).fit() model6.conf_int() #绘制时间序列模拟的诊断图 stdresid = model6.resid / math.sqrt(model6.sigma2) plt.plot(stdresid) plot_acf(stdresid, lags=20)
##自相关系数 acfs=stattools.acf(SH_ret) ##偏自相关系数 pacfs=stattools.pacf(SH_ret) plot_acf(SH_ret,use_vlines=True,lags=30) SH_ret.plot() plt.title('return') SH_close.plot() plt.title('close price') adfSH_ret=ADF(SH_ret) print(adfSH_ret) adfSH_close=ADF(SH_close) print(adfSH_close) LjungBox_ret=stattools.q_stat(acfs,len(SH_ret)) LjungBox_ret[1][-1] HS300_data['close']['2010-01-01':'2016-12-31'] HS300_data['2010-01-01':'2016-12-31'] model1=arima_model.ARIMA(SH_ret,order=(2,0,1)).fit() model1.summary() stattools.arma_order_select_ic(SH_ret,max_ma=4)
def get_arima_coefficients(series, order=(2, 1, 3)): """ Returns the ARIMA model coefficients for the given model with order (p, d, q) """ model = arima_model.ARIMA(series, order).fit(disp=False) return model.params
# # dta = sm.datasets.sunspots.load_pandas().data[['SUNACTIVITY']] # # dta.index = pd.DatetimeIndex(start='1700', end='2009', freq='A') # dta = df # res = sm.tsa.ARMA(dta, (3, 0)).fit() # fig, ax = plt.subplots() # ax = dta.ix['1950':].plot(ax=ax) # fig = res.plot_predict('2015', '2016', dynamic=True, ax=ax, # plot_insample=False) # plt.show() # from statsmodels.tsa.arima_model import _arima_predict_out_of_sample # res = sm.tsa.ARIMA(data[:,0], (3,1, 2)).fit() model = ari.ARIMA(data[:, 0], order=(3, 0, 2)) ar_res = model.fit() preds = ar_res.predict(3000, 3900, dynamic=True) plt.plot(preds) plt.show() # get what you need for predicting one-step ahead # params = res.params # residuals = res.resid # p = res.k_ar # q = res.k_ma # k_exog = res.k_exog # k_trend = res.k_trend # steps = 1 # statsmodels.tsa.arima_model.ARIMA.predict(params, start=len(data[:,0]))
endo = a['mood_mean'] P = [1, 2, 4, 6] D = range(0, 2) Q = range(0, 2) a1 = a best_score, best_cfg, best_predict, best_aic = float( 'inf'), None, pd.DataFrame(), float('inf') for p in P: for d in D: for q in Q: orders = (p, d, q) exog = a[ex_terms] endo = a['mood_mean'] try: model = stat.ARIMA( endog=endo, exog=exog, freq='d', order=orders) #order is the parameter (p,d,q) model_ = model.fit(disp=0) a1['pred'] = model_.predict(start=split_point, exog=exog, dynamic=True) aic = model_.aic a1.fillna( a1.mean(), inplace=True ) #fill Nan with mean first to inver_transform to original value truth = a['mood_mean'].iloc[split_point:length] predict = a1['pred'].iloc[split_point:length] MSE = mean_squared_error(truth, predict) if MSE < best_score: best_predict = model_.predict(start=split_point, exog=exog,
def many_paras(): """ p-value本质是控制假阳性率(False positive rate,FPR) q-value 控制的是FDR (false discovery rate) Q-statistic: Qlb=T*(T+2)*sigma(j=1,p)(rj^2/(T-j)) rj残差序列,j阶自相关系数,T观测值的个数,p滞后阶数。 FDR = E(V/R) 错误发现次数V,总的拒绝次数R acf: 自相关系数 -- y(t)= a0 + a1*y(t-1) + epsilon p(x(i)|x(i-h)) :sigma(i=1,n-h) ((x(i)-mu)*(x(i+h)-mu)/sigma(i=1,n) ((x(i)-mu)^2)) pacf: 偏自相关系数,k-1个时间滞后 作为已知,只求k -- y(t)= a0 + a1*y(t-1) + ... a1*y(t-k) + epsilon p(x(i)..x(i-k)|x(i-1)x(i-k+1)) : ARMA(p,q): AR代表p阶自回归过程,MA代表q阶移动平均过程 ARIMA模型是在ARMA模型的基础上多了差分的操作。 ADF: 白噪声随机干扰项的一阶自回归过程。用单位根 检验,存在就是非平稳。y(t)= mu + fi*y(t-1) + epsilon。p阶要求 p个根的和小于1。 Sma: 移动平均 wma: 加权移动平均 ema: 指数移动平均 ewma: 指数加权移动平均 OBV: On Balance Volume, 多空比率净额= [(收盘价-最低价)-(最高价-收盘价)] ÷( 最高价-最低价)×V :return: """ # 1. 计算自相关系数 acfs = stattools.acf(SHRet) # 绘制自相关系数图 plot_acf(SHRet, use_vlines=True, lags=30) # 2. 计算偏自相关系数 pacfs = stattools.pacf(SHRet) plot_pacf(SHRet, use_vlines=True, lags=30) # 3. 进行ADF单位根检验,并查看结果; adfSHRet = ADF(SHRet) print(adfSHRet.summary().as_text()) # 4. Q 统计 LjungBox1 = stattools.q_stat(stattools.acf(SHRet)[1:13], len(SHRet)) print(LjungBox1) # 5. lag即为上述检验表达式中的m,在这里我们选择检验12阶的自相关系数。 LjungBox = stattools.q_stat(stattools.acf(CPItrain)[1:12], len(CPItrain)) # order表示建立的模型的阶数,c(1,0,1)表示建立的是ARMA(1,1)模型; # 中间的数字0表示使用原始的、未进行过差分(差分次数为0)的数据; model1 = arima_model.ARIMA(CPItrain, order=(1, 0, 1)).fit() model1.summary() model1.conf_int() # 6. 绘制时间序列模拟的诊断图 stdresid = model1.resid / math.sqrt(model1.sigma2) plt.plot(stdresid) plot_acf(stdresid, lags=20) LjungBox = stattools.q_stat(stattools.acf(stdresid)[1:13], len(stdresid)) print(LjungBox[1][-1]) print(model1.forecast(3)[0]) # 7. Autoregressive conditional heteroskedasticity model 自回归条件异方差模型 # y(t)=b*x(t)+epsilon(t) # epsilon(t)^2=a0+a1*epsilon(t-1)^2+a2*epsilon(t-2)^2+n(t) # \sigma_t^{2}=\omega+\sum_{i=1}^{p}\alpha_{i}\epsilon_{t-i}^{2} # n(t)独立同分布 期望为0,var(n^2)=r^2 am = arch_model(SHret) model = am.fit(update_freq=0) print(model.summary()) # 8. 对子 的 处理 pt = TradeTool() SSD = pt.SSD(priceAf, priceBf) SSDspread = pt.SSDSpread(priceAf, priceBf) SSDspread.describe() coefficients = pt.cointegration(priceAf, priceBf) CoSpreadF = pt.CointegrationSpread(priceA, priceB, formPeriod, formPeriod) CoSpreadTr = pt.CointegrationSpread(priceA, priceB, formPeriod, tradePeriod) CoSpreadTr.describe() bound = pt.calBound(priceA, priceB, 'Cointegration', formPeriod, width=1.2) # 9. 配对 选点 trtl = TradeTool() account = trtl.TradeSimPair(PAt, PBt, position) # 10. momentum function et = ElementTool() et.momentum(Close, 5).tail(n=5) momen35 = et.momentum(Close, 35) signal = [] for i in momen35: if i > 0: signal.append(1) else: signal.append(-1) signal = pd.Series(signal, index=momen35.index) signal.head() tradeSig = signal.shift(1) ret = Close / Close.shift(1) - 1 # ret=ret['2014-02-20':] # ret.head(n=3) Mom35Ret = ret * (signal.shift(1)) Mom35Ret[0:5] real_Mom35Ret = Mom35Ret[Mom35Ret != 0] real_ret = ret[ret != 0] Rsi12 = et.rsi(BOCMclp, 12) # 策略 rsi6 = et.rsi(BOCMclp, 6) rsi24 = et.rsi(BOCMclp, 24) # rsi6捕捉买卖点 Sig1 = [] for i in rsi6: if i > 80: Sig1.append(-1) elif i < 20: Sig1.append(1) else: Sig1.append(0) date1 = rsi6.index Signal1 = pd.Series(Sig1, index=date1) Signal1[Signal1 == 1].head(n=3) Signal1[Signal1 == -1].head(n=3) Signal2 = pd.Series(0, index=rsi24.index) lagrsi6 = rsi6.shift(1) lagrsi24 = rsi24.shift(1) for i in rsi24.index: if (rsi6[i] > rsi24[i]) & (lagrsi6[i] < lagrsi24[i]): Signal2[i] = 1 elif (rsi6[i] < rsi24[i]) & (lagrsi6[i] > lagrsi24[i]): Signal2[i] = -1 signal = Signal1 + Signal2 signal[signal >= 1] = 1 signal[signal <= -1] = -1 signal = signal.dropna() tradSig = signal.shift(1) tt = TradeTool() BuyOnly = tt.strategy_analy(buy, ret) SellOnly = tt.strategy_analy(sell, ret) Trade = tt.strategy_analy(tradSig, ret) Test = pd.DataFrame({ "BuyOnly": BuyOnly, "SellOnly": SellOnly, "Trade": Trade }) # 累计收益率 cumStock = np.cumprod(1 + ret) - 1 cumTrade = np.cumprod(1 + tradeRet) - 1 # 12. 移动平均线 sma5 = et.smaCal(Close, 5) # 12. 加权移动平均线 wma5 = et.wmaCal(Close, w) # 12. 指数移动平均线 Ema = et.emaCal(Close, period) print(Ema) # 12. 指数加权移动平均线 Ewma = et.ewmaCal(Close, 5, 0.2) # 13. 布林带 UnicomBBands = et.bbands(Close, 20, 2) print(UnicomBBands) multiplier = [1, 1.65, 1.96, 2, 2.58] price2010 = Close['2010-01-04':'2010-12-31'] tt.CalBollRisk(price2010, multiplier) # 14. 性能 btt = BackTestTool() Performance1 = btt.perform(Close, tradSignal1) print(Performance1) # 15. 交易, 回测 KDtrade = btt.trade(KDSignal, close) btt.backtest(KDtrade.Ret, KDtrade.KDtradeRet) # 16. 上下突破 KDupbreak = et.upbreak(KValue, DValue) * 1 KDupbreak[KDupbreak == 1].head() KDdownbreak = et.downbreak(KValue, DValue) * 1 KDdownbreak[KDdownbreak == 1].head() # "金叉"与"死叉"交易策略绩效表现 btt.backtest(KDbreak.Ret, KDbreak.KDbreakRet) # 17. 成交量指标 cumUpVol = et.VOblock(UpVol) cumDownVol = et.VOblock(DownVol) ALLVol = np.array([cumUpVol, cumDownVol]).transpose() # 18. 判断持有 hold = tt.judge_hold(trade) # 19. 单交易 TradeAccount = tt.TradeSim(close, hold) print(TradeAccount)
plot_acf(clprice, lags=20) from arch.unitroot import ADF adf = ADF(clprice, lags=6) print(adf.summary().as_text()) logReturn = pd.Series((np.log(clprice))).diff().dropna() logReturn.plot() adf = ADF(logReturn, lags=6) print(adf.summary().as_text()) plot_acf(logReturn, lags=20) plot_pacf(logReturn, lags=20) from statsmodels.tsa import arima_model model1 = arima_model.ARIMA(logReturn.values, order=(0, 0, 1)).fit() model1.summary() model2 = arima_model.ARIMA(logReturn.values, order=(1, 0, 0)).fit() model2.summary() #8. baiyun = zgsy = pd.read_csv('Data/Part4/003/baiyun.csv', index_col='Date') baiyun.index = pd.to_datetime(baiyun.index) clprice = baiyun.Close logReturn = pd.Series((np.log(clprice))).diff().dropna() logReturn.plot() adf = ADF(logReturn, lags=6) print(adf.summary().as_text())
cpi = ts.get_cpi() cpi.index = pd.to_datetime(cpi['month']) cpi = cpi['cpi'] cpitrain = cpi['2016-01-01':'2000-01-01'] cpitrain.plot() #是否平稳 print(ADF(cpitrain,max_lags=10).summary().as_text()) #是否白噪声 ljb0 = stattools.q_stat(stattools.acf(cpitrain)[1:12],len(cpitrain)) ljb0[1][-1] #识别ARMA模型参数pq plot_acf(cpitrain,use_vlineEs=True,lags=30) plot_pacf(cpitrain,use_vlines=True,lags=30) model1 = arima_model.ARIMA(cpitrain.values,order=(1, 0, 1)).fit() model1.summary() p = np.arange(1,4) q = np.arange(1,4) result = dict() for i in p: for j in q: model1 = arima_model.ARIMA(cpitrain.values, order=(i, 0, j)).fit() result[(i,j)] = (model1.aic, model1.bic) bestmodel = arima_model.ARIMA(cpitrain.values,order=(3, 0, 2)).fit() stdresid = bestmodel.resid/math.sqrt(bestmodel.sigma2) plt.plot(stdresid) plot_acf(stdresid,lags=12) ljb = stattools.q_stat(stattools.acf(stdresid)[1:12],len(stdresid))
def ARIMA(series, n, name): #一阶差分的ARIMA模型 series = np.array(series) series = pd.Series(series.reshape(-1)) currentDir = os.getcwd() #当前工作路径 #一阶差分数据 fd = series.diff(1)[1:] #plot_acf(fd).savefig('./'+ name +'一阶差分自相关图.png') #plot_pacf(fd).savefig('./'+ name +'一阶差分偏自相关图.png') #一阶差分单位根检验 unitP = adfuller(fd)[1] if unitP > 0.05: unitAssess = '单位根检验中p值为%.2f,大于0.05,认为该一阶差分序列判断为非平稳序列' % (unitP) else: unitAssess = '单位根检验中p值为%.2f,小于0.05,认为该一阶差分序列判断为平稳序列' % (unitP) #白噪声检验 noiseP = acorr_ljungbox(fd, lags=1)[-1] if noiseP <= 0.05: noiseAssess = '白噪声检验中p值为%.2f,小于0.05,认为该一阶差分序列为非白噪声' % noiseP else: noiseAssess = '白噪声检验中%.2f,大于0.05,认为该一阶差分序列为白噪声' % noiseP #BIC准则确定p、q值 pMax = 3 qMax = pMax bics = list() for p in range(pMax + 1): tmp = list() for q in range(qMax + 1): try: tmp.append(arima_model.ARIMA(series, (p, 1, q)).fit().bic) except Exception as e: #print(str(e)) tmp.append(1e+10) #加入一个很大的数 bics.append(tmp) bics = pd.DataFrame(bics) p, q = bics.stack().idxmin() print('BIC准则下确定p,q为%s,%s' % (p, q)) model = arima_model.ARIMA(series, order=(p, 1, q)) result = model.fit() print(result.summary()) predict = result.forecast(n)[0] #model_a = pf.ARIMA(d1[s:e],ar=p,ma=q,integ=0) #x= model_a.fit() #model_a.plot_fit() #model_a.plot_predict(h=20,past_values=50) return { 'model': { 'value': model, 'desc': '模型' }, 'unitP': { 'value': unitP, 'desc': unitAssess }, 'noiseP': { 'value': noiseP[0], 'desc': noiseAssess }, 'p': { 'value': p, 'desc': 'AR模型阶数' }, 'q': { 'value': q, 'desc': 'MA模型阶数' }, 'predict': { 'value': predict, 'desc': '往后预测%d个的序列' % (n) } }
file_name = './temp/discdata_processed.xls' # file_name = './data/discdata_processed.xls' data = pd.read_excel(file_name, index_col='COLLECTTIME') # 最后5条数据作为验证数据 train_data = data.iloc[:-5, :] test_data = data.iloc[-5:, :] # print(test_data) xdata = train_data['CWXT_DB:184:D:\\'] # 定阶 pmax = len(xdata) // 10 qmax = len(xdata) // 10 # 建立训练模型 arima = arima_model.ARIMA(xdata, (0, 1, 1)).fit() xdata_pred = arima.predict(typ='levels') # 计算残差 pred_error = (xdata_pred - xdata).dropna() # 再次检验白噪声 lb, p = diagnostic.acorr_ljungbox(pred_error, lags=12) h = (p < 0.05).sum() if h > 0: print(u'模型ARIMA(0,1,1)不符合白噪声检验') else: print(u'模型ARIMA(0,1,1)符合白噪声检验') # 预测接下来的5天的数据 test_pred = arima.forecast(5)[0]
def ts_fit_arima(ts, order): return arima.ARIMA(ts, order=order).fit(disp=0)
df_scan = df_new.iloc[:k] best_aic = np.inf best_order = None best_mdl = None #print(test[sym][-1]) pq_rng = range(4) # [0,1,2,3] d_rng = range(2) # [0,1] #train.dropna(inplace= True) for i in pq_rng: for d in d_rng: for j in pq_rng: # if(k>10): try: # print("no error") tmp_mdl = smt.ARIMA(df_scan, order=(i, d, j)).fit(method='mle', trend='nc', disp=0) #print(tmp_mdl.aic) tmp_aic = tmp_mdl.aic #print(tmp_aic) if tmp_aic < best_aic: best_aic = tmp_aic best_order = (i, d, j) best_mdl = tmp_mdl except: continue #print(best_mdl) if best_mdl is None: #print("bakwas") temp = temp + 1
def fit_row(self, row): model = arima_model.ARIMA(row, (self.p, self.d, self.q)).fit() pred = model.predict() params = model.params return (pred, params)
# plt.plot(X) sm.graphics.tsa.plot_acf(X) visual_stable(X) visual_stable(X, 1) visual_stable(X, 2) d = 1 # Determine p, q sm.graphics.tsa.plot_acf(np.diff(X, n=d)) sm.graphics.tsa.plot_pacf(np.diff(X, n=d)) # Fit with ARIMA(p, d, q) model = ARIMA_model.ARIMA(X, (1, d, 3)) result = model.fit() print(result.summary()) # Residual test # Note that the prediction loses d of data, so the original data # should drop first-d elements to match. residual = np.delete(X, range(d)) - result.predict(typ='levels') result.plot_predict() _, Q, p = stattools.acf(residual, qstat=True) Q6, Q12 = Q[5], Q[12] p6, p12 = p[5], p[12] if p6 < 0.05 or p12 < 0.05: sad('Bad! Not a white noise, test failed') if p6 >= 0.05: