示例#1
0
    def stationary_test(self, test_df, plot=False):

        test_obj = test_df['return'][1::]

        acf = stattools.acf(test_obj, nlags=10)
        pacf = stattools.pacf(test_obj, nlags=10)
        ADF = unitroot_adf(test_obj)

        if plot:
            f = plt.figure(facecolor='white')
            ax1 = f.add_subplot(211)
            plot_acf(test_obj, lags=10, ax=ax1)
            ax2 = f.add_subplot(212)
            plot_pacf(test_obj, lags=10, ax=ax2)
            plt.show()
            #plt.figure(figsize = (10,10))
            #plt.stem(acf)
            #plt.title('ACF')
            #plt.show()
            #plt.figure(figsize = (10,10))
            #plt.stem(pacf)
            #plt.title('PACF')
            #plt.show()

        return {'acf': acf, 'pacf': pacf, 'adf': ADF}
示例#2
0
 def baseline(self):
     import pandas as pd
     import math
     import numpy as np
     from dev_global.env import TIME_FMT
     df = self.mysql.select_values('SH000300', 'trade_date,close_price')
     # data cleaning
     df.columns = ['trade_date', 'close_price']
     pd.to_datetime(df['trade_date'], format=TIME_FMT)
     df.set_index('trade_date', inplace=True)
     # data constructing
     df['shift'] = df['close_price'].shift(1)
     df['amplitude'] = df['close_price'] / df['shift']
     df['ln_amplitude'] = np.log(df['amplitude'])
     df.dropna(inplace=True)
     # print(df.head(5))
     # plot
     input_df = df['ln_amplitude'][-200:]
     import statsmodels.tsa.api as smt
     acf = smt.stattools.acf(input_df, nlags=40)
     pacf = smt.stattools.pacf(input_df, nlags=40)
     acf_pacf_plot(input_df, lags=40)
     from statsmodels.stats.diagnostic import unitroot_adf
     result = unitroot_adf(input_df)
     print(result[1])
示例#3
0
def adf_(timeseries):  # adf_ 检验平稳性
    """

    :param timeseries: time series that aims to analyse
    :return: the values of the adfuller test and critical test, in order to determine whether the time series is stable or not
    """
    adf_test = unitroot_adf(timeseries)
    adf_test_value = adf_test[0]
    adfuller_value = pd.DataFrame(
        {key: value
         for key, value in adf_test[4].items()}, index=[0])
    adfuller_value = pd.DataFrame(adfuller_value)
    adfuller_critical_value = adfuller_value['10%'][0]
    return adf_test_value, adfuller_critical_value
示例#4
0
    def seasonality_price_decomp(self, test_df, f=0, plot=False):
        if f == 0:
            raise ValueError('\nError freqency input!! \n')

        obv = test_df['close']

        if test_df['timestamp'][1] - test_df['timestamp'][0] == 60:

            raise ValueError('\n Wrong using minute data! \n')

        #since 'return[0]' is nan
        decomposition = seasonal_decompose(obv, freq=f, model='additive')
        trend = decomposition.trend
        seasonal = decomposition.seasonal
        residual = decomposition.resid
        ADF = unitroot_adf(test_df['return'][1::])

        if plot:
            plt.figure(figsize=(15, 10))
            plt.subplot(411)
            plt.plot(obv, label='obv', lw=0.7)
            plt.legend(loc='best')
            plt.subplot(412)
            plt.plot(trend, label='trend', lw=0.7)
            plt.legend(loc='best')
            plt.subplot(413)
            plt.plot(seasonal, label='seasonal', lw=0.7)
            plt.legend(loc='best')
            plt.subplot(414)
            plt.plot(residual, label='residual', lw=0.7)
            plt.legend(loc='best')
            plt.show()

            print('stats =', ADF[0], 'Alpha =', ADF[4])
            if ADF[0] < ADF[4]['1%']:
                print('\nResidual is stable in 99% confid. interval')

        return {
            'trend': trend,
            'seasonal': seasonal,
            'residual': residual,
            'adf': ADF
        }
示例#5
0
## 平稳性检验
# Method 1: time series plot
fig, ax = plt.subplots()
fig.set_size_inches(9, 3.5)
ax.plot(df['Date'], df['Close'])
ax.xaxis.set_major_locator(matplotlib.ticker.MultipleLocator(90))
plt.xticks(rotation=90)
ax.tick_params(labelsize=7)
plt.show()

# Method 2: calculate and plot ACF and PACF
n_lag = 100  ###lag
acf = stattools.acf(df['Close'], nlags=n_lag)  #Autocorrelation Coefficient
pacf = stattools.pacf(df['Close'], nlags=n_lag)
print('Autocorrelation Coefficient (ACF): \n{}'.format(acf))
print('Partial Autocorrelation Coefficient (PACF): \n{}'.format(pacf))

sm.graphics.tsa.plot_acf(df['Close'], lags=n_lag)
plt.show()  # 阴影部分是置信区间。默认情况下,置信区间被设置为95%。

sm.graphics.tsa.plot_pacf(df['Close'], lags=n_lag)
plt.show()

# 单位根检验(这里采用ADF检验,分别用两种方法进行,第二种的输出效果较好)
from statsmodels.stats.diagnostic import unitroot_adf
adf_method1 = unitroot_adf(df['Close'])
print('ADF method 1: \n{}'.format(adf_method1))

from arch.unitroot import ADF
adf_method2 = ADF(df['Close'])
print('ADF method 2: \n{}'.format(adf_method2))
示例#6
0
plt.figure(figsize=(8, 5))
plt.errorbar(s.index, means, yerr=sigmas, alpha=0.5)
plt.plot(s.index, means, 'g', linewidth=4)

plt.show()


# 平稳性检测
df = pd.concat([pd.DataFrame(date_range[days:],columns=['date']),
                pd.DataFrame(score,columns=['score'])],
                axis=1)

from statsmodels.tsa import stattools
from statsmodels.stats.diagnostic import unitroot_adf

print(unitroot_adf(df.score))

#plt.stem(stattools.acf(df.score));
k = stattools.pacf(df.score)
k[1] = 0.003
plt.stem(k);

# 画出波峰波谷
data = means.reshape((len(means),))
doublediff = np.diff(np.sign(np.diff(data)))
peak_locations = np.where(doublediff == -2)[0] + 1
peak_locations = peak_locations[:(len(peak_locations)-1)]

doublediff2 = np.diff(np.sign(np.diff(-1*data)))
trough_locations = np.where(doublediff2 == -2)[0] + 1
        for j in codelist:
            datas_filename = para.path_data + '%s' % j + '_' + '%s' % i + '.xlsx'
            datas = pd.read_excel(datas_filename, index_col=0, parse_dates=True)
            datas_minus_signal = pd.DataFrame(datas[datas.signal == c],columns = datas.columns)
            datas_minus_signal = pd.DataFrame(datas.iloc[:,2:])
            # step 1:描述性统计分析报告
            profile = datas_minus_signal.profile_report(title='%s' % j + '_' + '%s' % i + '_' + '%s' % stage+ ' Exploratory Data Analysis')
            profile.to_file(
                output_file=para.path_results + '%s' % j + '_' + '%s' % i + '_' + '%s' % stage+ 'Exploratory Data Analysis.html')

            period = 15
            dict_roll = {}
            for k in range(1, datas_minus_signal.columns.size):
                columns_name = datas_minus_signal.columns[k]
                # step 2: 时间序列检验:单位根检验
                adf_result = unitroot_adf(datas_minus_signal.iloc[:, k].dropna())
                adf_result_diff = unitroot_adf(datas_minus_signal.iloc[:, k].diff().dropna())
                print('时间序列检验:单位根检验: %s' % j + '_%s' % i+ '_' + '%s' % stage, columns_name,
                      round(adf_result[0], 4), round(adf_result[4]['5%'], 4))
                print('时间序列检验:单位根检验: %s' % j + '_%s' % i+ '_' + '%s' % stage, columns_name, '同比',
                      round(adf_result_diff[0], 4), round(adf_result_diff[4]['5%'], 4))
                # step 3: 画图
                y = pd.DataFrame(datas_minus_signal.iloc[:, 0]).apply(
                    lambda x: (x - np.min(x)) / (np.max(x) - np.min(x)))
                X = pd.DataFrame(datas_minus_signal.iloc[:, k]).apply(
                    lambda x: (x - np.min(x)) / (np.max(x) - np.min(x)))
                figure_count = 1
                plt.figure(figure_count)
                figure_count += 1
                plt.plot(y, 'k-', label='%s' % j+ '_' + '%s' % stage)
                plt.plot(X, 'b-', label='%s' % i + '_' + '%s' % stage+ '%s' % columns_name)
示例#8
0
    plt.title("上证指数的收益率序列")
    plt.savefig("plot_images/上证收益率序列.png")
    plt.show()

    #得到统计量,这部分写入函数mean,max,min,std,skewness,
    data_analyse(shdf['ratio'].values)

    #绘制收益率的分布直方图
    import seaborn
    plt.figure(figsize=(8, 3))
    seaborn.distplot(shdf['ratio'].values, bins=50, kde=False)
    plt.title("收益率分布直方图")
    plt.savefig("plot_images/收益率分布直方图.png")
    plt.show()
    plt.close()

    #平稳性检验,单位根检验。ADF
    from statsmodels.stats.diagnostic import unitroot_adf
    unitroot_adf(shdf['ratio'].values)

    #对序列做Ljung-box检验。
    import statsmodels as sm
    Q, P = sm.stats.diagnostic.acorr_ljungbox(shdf['ratio'].values, lags=20)
    box_test = pd.DataFrame({
        "Lags": np.arange(1, 21),
        "Q_statistic": Q,
        "p_value": P
    })
    print(box_test)
    box_test.to_csv("intermediate/Ljung_box.csv")  # doctest: +SKIP