def stationary_test(self, test_df, plot=False): test_obj = test_df['return'][1::] acf = stattools.acf(test_obj, nlags=10) pacf = stattools.pacf(test_obj, nlags=10) ADF = unitroot_adf(test_obj) if plot: f = plt.figure(facecolor='white') ax1 = f.add_subplot(211) plot_acf(test_obj, lags=10, ax=ax1) ax2 = f.add_subplot(212) plot_pacf(test_obj, lags=10, ax=ax2) plt.show() #plt.figure(figsize = (10,10)) #plt.stem(acf) #plt.title('ACF') #plt.show() #plt.figure(figsize = (10,10)) #plt.stem(pacf) #plt.title('PACF') #plt.show() return {'acf': acf, 'pacf': pacf, 'adf': ADF}
def baseline(self): import pandas as pd import math import numpy as np from dev_global.env import TIME_FMT df = self.mysql.select_values('SH000300', 'trade_date,close_price') # data cleaning df.columns = ['trade_date', 'close_price'] pd.to_datetime(df['trade_date'], format=TIME_FMT) df.set_index('trade_date', inplace=True) # data constructing df['shift'] = df['close_price'].shift(1) df['amplitude'] = df['close_price'] / df['shift'] df['ln_amplitude'] = np.log(df['amplitude']) df.dropna(inplace=True) # print(df.head(5)) # plot input_df = df['ln_amplitude'][-200:] import statsmodels.tsa.api as smt acf = smt.stattools.acf(input_df, nlags=40) pacf = smt.stattools.pacf(input_df, nlags=40) acf_pacf_plot(input_df, lags=40) from statsmodels.stats.diagnostic import unitroot_adf result = unitroot_adf(input_df) print(result[1])
def adf_(timeseries): # adf_ 检验平稳性 """ :param timeseries: time series that aims to analyse :return: the values of the adfuller test and critical test, in order to determine whether the time series is stable or not """ adf_test = unitroot_adf(timeseries) adf_test_value = adf_test[0] adfuller_value = pd.DataFrame( {key: value for key, value in adf_test[4].items()}, index=[0]) adfuller_value = pd.DataFrame(adfuller_value) adfuller_critical_value = adfuller_value['10%'][0] return adf_test_value, adfuller_critical_value
def seasonality_price_decomp(self, test_df, f=0, plot=False): if f == 0: raise ValueError('\nError freqency input!! \n') obv = test_df['close'] if test_df['timestamp'][1] - test_df['timestamp'][0] == 60: raise ValueError('\n Wrong using minute data! \n') #since 'return[0]' is nan decomposition = seasonal_decompose(obv, freq=f, model='additive') trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid ADF = unitroot_adf(test_df['return'][1::]) if plot: plt.figure(figsize=(15, 10)) plt.subplot(411) plt.plot(obv, label='obv', lw=0.7) plt.legend(loc='best') plt.subplot(412) plt.plot(trend, label='trend', lw=0.7) plt.legend(loc='best') plt.subplot(413) plt.plot(seasonal, label='seasonal', lw=0.7) plt.legend(loc='best') plt.subplot(414) plt.plot(residual, label='residual', lw=0.7) plt.legend(loc='best') plt.show() print('stats =', ADF[0], 'Alpha =', ADF[4]) if ADF[0] < ADF[4]['1%']: print('\nResidual is stable in 99% confid. interval') return { 'trend': trend, 'seasonal': seasonal, 'residual': residual, 'adf': ADF }
## 平稳性检验 # Method 1: time series plot fig, ax = plt.subplots() fig.set_size_inches(9, 3.5) ax.plot(df['Date'], df['Close']) ax.xaxis.set_major_locator(matplotlib.ticker.MultipleLocator(90)) plt.xticks(rotation=90) ax.tick_params(labelsize=7) plt.show() # Method 2: calculate and plot ACF and PACF n_lag = 100 ###lag acf = stattools.acf(df['Close'], nlags=n_lag) #Autocorrelation Coefficient pacf = stattools.pacf(df['Close'], nlags=n_lag) print('Autocorrelation Coefficient (ACF): \n{}'.format(acf)) print('Partial Autocorrelation Coefficient (PACF): \n{}'.format(pacf)) sm.graphics.tsa.plot_acf(df['Close'], lags=n_lag) plt.show() # 阴影部分是置信区间。默认情况下,置信区间被设置为95%。 sm.graphics.tsa.plot_pacf(df['Close'], lags=n_lag) plt.show() # 单位根检验(这里采用ADF检验,分别用两种方法进行,第二种的输出效果较好) from statsmodels.stats.diagnostic import unitroot_adf adf_method1 = unitroot_adf(df['Close']) print('ADF method 1: \n{}'.format(adf_method1)) from arch.unitroot import ADF adf_method2 = ADF(df['Close']) print('ADF method 2: \n{}'.format(adf_method2))
plt.figure(figsize=(8, 5)) plt.errorbar(s.index, means, yerr=sigmas, alpha=0.5) plt.plot(s.index, means, 'g', linewidth=4) plt.show() # 平稳性检测 df = pd.concat([pd.DataFrame(date_range[days:],columns=['date']), pd.DataFrame(score,columns=['score'])], axis=1) from statsmodels.tsa import stattools from statsmodels.stats.diagnostic import unitroot_adf print(unitroot_adf(df.score)) #plt.stem(stattools.acf(df.score)); k = stattools.pacf(df.score) k[1] = 0.003 plt.stem(k); # 画出波峰波谷 data = means.reshape((len(means),)) doublediff = np.diff(np.sign(np.diff(data))) peak_locations = np.where(doublediff == -2)[0] + 1 peak_locations = peak_locations[:(len(peak_locations)-1)] doublediff2 = np.diff(np.sign(np.diff(-1*data))) trough_locations = np.where(doublediff2 == -2)[0] + 1
for j in codelist: datas_filename = para.path_data + '%s' % j + '_' + '%s' % i + '.xlsx' datas = pd.read_excel(datas_filename, index_col=0, parse_dates=True) datas_minus_signal = pd.DataFrame(datas[datas.signal == c],columns = datas.columns) datas_minus_signal = pd.DataFrame(datas.iloc[:,2:]) # step 1:描述性统计分析报告 profile = datas_minus_signal.profile_report(title='%s' % j + '_' + '%s' % i + '_' + '%s' % stage+ ' Exploratory Data Analysis') profile.to_file( output_file=para.path_results + '%s' % j + '_' + '%s' % i + '_' + '%s' % stage+ 'Exploratory Data Analysis.html') period = 15 dict_roll = {} for k in range(1, datas_minus_signal.columns.size): columns_name = datas_minus_signal.columns[k] # step 2: 时间序列检验:单位根检验 adf_result = unitroot_adf(datas_minus_signal.iloc[:, k].dropna()) adf_result_diff = unitroot_adf(datas_minus_signal.iloc[:, k].diff().dropna()) print('时间序列检验:单位根检验: %s' % j + '_%s' % i+ '_' + '%s' % stage, columns_name, round(adf_result[0], 4), round(adf_result[4]['5%'], 4)) print('时间序列检验:单位根检验: %s' % j + '_%s' % i+ '_' + '%s' % stage, columns_name, '同比', round(adf_result_diff[0], 4), round(adf_result_diff[4]['5%'], 4)) # step 3: 画图 y = pd.DataFrame(datas_minus_signal.iloc[:, 0]).apply( lambda x: (x - np.min(x)) / (np.max(x) - np.min(x))) X = pd.DataFrame(datas_minus_signal.iloc[:, k]).apply( lambda x: (x - np.min(x)) / (np.max(x) - np.min(x))) figure_count = 1 plt.figure(figure_count) figure_count += 1 plt.plot(y, 'k-', label='%s' % j+ '_' + '%s' % stage) plt.plot(X, 'b-', label='%s' % i + '_' + '%s' % stage+ '%s' % columns_name)
plt.title("上证指数的收益率序列") plt.savefig("plot_images/上证收益率序列.png") plt.show() #得到统计量,这部分写入函数mean,max,min,std,skewness, data_analyse(shdf['ratio'].values) #绘制收益率的分布直方图 import seaborn plt.figure(figsize=(8, 3)) seaborn.distplot(shdf['ratio'].values, bins=50, kde=False) plt.title("收益率分布直方图") plt.savefig("plot_images/收益率分布直方图.png") plt.show() plt.close() #平稳性检验,单位根检验。ADF from statsmodels.stats.diagnostic import unitroot_adf unitroot_adf(shdf['ratio'].values) #对序列做Ljung-box检验。 import statsmodels as sm Q, P = sm.stats.diagnostic.acorr_ljungbox(shdf['ratio'].values, lags=20) box_test = pd.DataFrame({ "Lags": np.arange(1, 21), "Q_statistic": Q, "p_value": P }) print(box_test) box_test.to_csv("intermediate/Ljung_box.csv") # doctest: +SKIP