示例#1
0
def vector_check(y, x, n=5):
    print('\nunit root test for regressand\n', adf(y))
    print('\nunit root test for regressor\n', adf(x))
    print('\ngranger causality test\n')
    print('x to y')
    (gct(pd.concat([y, x], axis=1), maxlag=n))
    print('\ny to x')
    (gct(pd.concat([x, y], axis=1), maxlag=n))

    print('\n\nEngle-Granger')
    x_sm = sm.add_constant(x)
    m = sm.OLS(y, x_sm).fit()
    print('\n', adf(m.resid))
示例#2
0
def ADF_test(residuals, output_log = False, title = "ADF Test Results"):
	t0 = residuals
	t1 = residuals.shift()

	shifted = t1 - t0
	shifted.dropna(inplace = True)

	plt.plot(shifted, c='green')
	plt.show()

	adf_value = adf(shifted, regression = 'nc')

	test_statistic = adf_value[0]
	pvalue = adf_value[1]
	usedlags = adf_value[2]
	nobs = adf_value[3]


	if output_log:
		#output on figure eventually, that looks really professional
		print title
		print "Test Statistic: %.4f\nP-Value: %.4f\nLags Used: %d\nObservations: %d" % (test_statistic, pvalue, usedlags, nobs)

		for crit in adf_value[4]:
			print crit, adf_value[4][crit]
			#print "Critical Value (%s): %.3f" % (crit, adf_value[crit])

	return adf_value
示例#3
0
def SARIMAX(df, future, m=1, n=1, o=1, lag=12):

    print(adf(df.diff().fillna(df.bfill())))
    fig = plt.figure(figsize=(20, 20))
    ax = fig.add_subplot(211)
    sm.graphics.tsa.plot_pacf(df, ax=ax)
    bx = fig.add_subplot(212)
    sm.graphics.tsa.plot_acf(df, ax=bx)
    plt.show()

    m = sm.tsa.statespace.SARIMAX(df,
                                  order=(m, n, o),
                                  seasonal_order=(1, 1, 1, lag),
                                  enforce_stationarity=False,
                                  enforce_invertibility=False).fit()

    m.plot_diagnostics(figsize=(20, 10))
    print(m.summary())

    p = m.get_forecast(steps=future)

    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    ax.plot(p.predicted_mean, label='forecast', c=pick_a_color())
    ax.plot(m.predict(), label='fitted', c=pick_a_color())
    ax.plot(df, label='actual', c=pick_a_color())
    ax.fill_between(p.conf_int().index, \
                    p.conf_int().iloc[:, 0], \
                    p.conf_int().iloc[:, 1], \
                    alpha=.25,color=pick_a_color())
    plt.legend(loc='best')
    plt.title('%s steps ahead forecast' % (future))
    plt.show()
示例#4
0
def seasonality_check(df, freq='monthly'):

    lag=np.select([freq=='monthly',freq=='quarterly'], \
                      [12,4])

    print('ARIMA decomposition')

    df2 = sd(df, freq=lag)
    print(adf(df))
    sm.graphics.tsa.plot_acf(df)
    plt.show()
    sm.graphics.tsa.plot_pacf(df)
    plt.show()
    df.plot()
    plt.title('original')
    plt.show()
    df2.trend.plot(c=pick_a_color())
    plt.title('trend')
    plt.show()
    df2.seasonal.plot(c=pick_a_color())
    plt.title('seasonality')
    plt.show()
    df2.resid.plot(c=pick_a_color())
    plt.title('residual')
    plt.show()

    print('HP filter')
    hplag=np.select([freq=='monthly',freq=='quarterly',freq=='annual'], \
                    [14400,1600,100])
    cycle, trend = sm.tsa.filters.hpfilter(df, hplag)
    cycle.plot(c=pick_a_color())
    plt.title('cycle')
    plt.show()
    trend.plot(c=pick_a_color())
    plt.title('trend')
    plt.show()

    print('differential')
    df3 = df - df.shift(1) - (df.shift(lag - 1) - df.shift(lag))
    df3.plot(c=pick_a_color())
    plt.show()

    print('weighted')
    var = locals()
    for i in range(1, lag + 1):
        var['seasonal_weight'+str(i)]= \
        np.mean(df[df.index.month==i])/np.mean(df)
        print(var['seasonal_weight' + str(i)])

    df_adj = pd.Series(df)
    for j in df.index:
        df_adj[j:j] = df[j:j] / var['seasonal_weight' + str(j.month)]

    df_adj.plot(c=pick_a_color())
    plt.show()
示例#5
0
def draw_picture():
    parameter_type = ['W01', '060', 'W02', '101', 'W07']
    wdp_mode={'W01':'水温','060':'氨氮','W02':'溶解氧','101':'总磷','W07':'高锰酸盐'}
    data_set={parameter:get_data(parameter) for parameter in parameter_type}
    for key,value in data_set.items():
       print(wdp_mode[key],adf(value['data_value']))
       mpl.rcParams['font.sans-serif'] = ['SimHei'] #正常显示中文
       mpl.rcParams['axes.unicode_minus'] = False
       # plt.title(wdp_mode[key]) # 显示图标题
       # plt.show()
    return 0
示例#6
0
 def run_test(self, num_timesteps_back, alpha=0.05):
     results = []
     for fn, data in self.stats.items():
         try:
             adf_res = adf(data[-num_timesteps_back:])[1] < alpha
         except ValueError as e:
             adf_res = None
         try:
             ttest_res = ttest(
                 data[int(-num_timesteps_back):int(-num_timesteps_back /
                                                   2)],
                 data[int(-num_timesteps_back / 2):])[1] > alpha
         except ValueError as e:
             ttest_res = None
         results.append(adf_res and ttest_res)
     return np.all(results)
示例#7
0
ax = plt.figure(figsize=(10, 5)).add_subplot(111)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

df['epsilon'].hist(histtype='bar', color='#ede574', width=0.007, bins=80)

plt.title('OLS vs Elastic Net', fontsize=15)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.grid(False)
plt.ylabel('Frequency')
plt.xlabel('Interval')
plt.show()

print(adf(df['epsilon']))

#unit root test results:
#(-2.4689818197725981, 0.12320492058022914, 2, 1286, {'1%': -3.4354451795550935, '5%': -2.863790090661305, '10%': -2.5679679660127368}, -6151.8371655225037)
#hence, its not a stationary process

# In[6]:

#next step is to compare mean and standard deviation of two approaches
df['sk_residual'] = df['nok'] - df['sk_fit']
df['ols_residual'] = df['nok'] - df['ols_fit']

print(np.mean(df['sk_residual']) > np.mean(df['ols_residual']))
print(np.std(df['sk_residual']) > np.std(df['ols_residual']))

#boolean values:
#we have to use Engle-Granger two step!
#salute to Engle, mentor of my mentor Gallo
#to the nobel prize winner

#im not gonna explain much here
#if u have checked my other codes, u sould know
#details are in pair trading session
# https://github.com/je-suis-tm/quant-trading/blob/master/Pair%20trading%20backtest.py

x2=df['eur'][df.index<'2017-04-25']
x3=sm.add_constant(x2)

model=sm.OLS(y,x3).fit()
ero=model.resid

print(adf(ero))
print(model.summary())

#(-2.5593457642922992, 0.10169409761939013, 0, 1030, 
#{'1%': -3.4367147300588341, '5%': -2.8643501440982058, '10%': -2.5682662399849185}, -1904.8360920752475)
#0.731199409071
#unfortunately, the residual hasnt even reached 90% confidence interval
#we cant conclude any cointegration from the test
#still, from the visualization
#we can tell nok and eur are somewhat correlated
#our rsquared suggested euro has the power of 73% explanation on nok


# In[14]:

plt.rcParams['font.sans-serif'] = ['SimHei']  #用来正确显示中文
plt.rcParams['axes.unicode_minus'] = False  # 用来正确显示负号
arima_data.plot()
plt.show()

# In[57]:

#自相关图
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(arima_data)

# In[58]:

from statsmodels.tsa.stattools import adfuller as adf
print(adf(arima_data[u'销量']))

# In[59]:

D_arima_data = arima_data.diff().dropna()
D_arima_data.columns = [u'时间差分']
D_arima_data.plot()

# In[60]:

plot_acf(D_arima_data)

# In[64]:

from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_arima_data)  #偏自相关图
示例#10
0
        diff.append(value)
    return Series(diff)


# Revert dataset back from 'deseasonlization'
def inverse_diff(history, yhat, interval=12):
    return yhat + history[-interval]


# Determine initial p,d,q values for ARIMA

station = diff(X)

# Check if stationary

result = adf(station)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for percent, value in result[4].items():
    print('\t%s: %.3f' % (percent, value))

# P value is smaller than 1%. dataset is stationary and null hypothesis can be rejected.
# d will start with a value of 0
# Determine p,q values by plotting ACF and PACF
# distribution is not Gaussian, so ACF may be useless

plt.figure()
plt.subplot(211)
plot_acf(station, ax=plt.gca())
plt.subplot(212)
df = pd.read_excel('timeseries.xlsx')
df.index = pd.to_datetime(df['Date'])
df['Person Rate'].plot()

import statsmodels.api as sm
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(df['Person Rate'],lags = 16, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(df['Person Rate'], lags = 16, ax=ax2)
plt.show()

from statsmodels.tsa.stattools import adfuller as adf
x = df['Call Rate']
result = adf(x)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')

model=sm.tsa.ARIMA(endog=df['Call Rate'],order=(0,1,6))
results=model.fit(start_params=)
print(results.summary())

def test_stationarity(timeseries):
    
    #Determing Rolling Statistics
    rolmean = pd.rolling_mean(timeseries, window=12)
    rolstd = pd.rolling_std(timeseries, window=12)
    
    #plot rolling statistics
    Y = list(training_df.loc[:, stock2])

    # Use TLS to determine the hedge ratio
    linear = odrpack.Model(f)
    mydata = odrpack.RealData(X, Y)
    myodr = odrpack.ODR(mydata, linear, beta0=[1., 2.])
    myoutput = myodr.run()

    intercept = myoutput.beta[1]
    slope = myoutput.beta[0]

    residual = []
    for i in range(len(X)):
        residual.append(Y[i] - slope * X[i] - intercept)

    result = adf(residual)
    AIC_test_value = result[0]
    p_value = result[1]

    vol_dict[pairs] = np.std(residual)
    vol_list.append(np.std(residual))

    pairs_testvalue_dict[pairs] = AIC_test_value

sorted_by_value = sorted(pairs_testvalue_dict.items(),
                         key=lambda x: x[1],
                         reverse=False)
sorted_by_vol = sorted(vol_dict.items(), key=lambda x: x[1], reverse=True)

Top_pairs = []
print(max(vol_list))
mean_var_Y = np.mean(var_Y) * 100
error_mean_var_Y = np.std(var_Y) * 100
mean_var_C = np.mean(var_C) * 100
error_mean_var_C = np.std(var_C) * 100
mean_var_I = np.mean(var_I) * 100
error_mean_var_I = np.std(var_I) * 100

relative_variance_Y = mean_var_Y / mean_var_Y
relative_variance_C = mean_var_C / mean_var_Y
relative_variance_I = mean_var_I / mean_var_Y

adf_Y = []
adf_C = []
adf_I = []
for column in Y:
    adf_y = adf(Y[column])
    adf_c = adf(C[column])
    adf_i = adf(I[column])
    adf_Y.append(adf_y)
    adf_C.append(adf_c)
    adf_I.append(adf_i)
adf_Y_99 = []
adf_C_99 = []
adf_I_99 = []
adf_Y_95 = []
adf_C_95 = []
adf_I_95 = []
adf_Y_90 = []
adf_C_90 = []
adf_I_90 = []
示例#14
0
def _test_adf_threshold(spp, num_timesteps_back, alpha=0.05):
    result = adf(spp.Nt[-num_timesteps_back:])[1] < alpha
    return result
示例#15
0
                pADF[name] = np.nan
                pHypo[name] = np.nan
                pBeta0[name] = np.nan
                pBeta1[name] = np.nan
                print(name)
                for k in range(len(stock) - 244):
                    if stock[[i, j]][k:k + 244].isna().sum().sum() == 0:
                        mdl = VECM(stock[[i, j]][k:k + 244],
                                   coint_rank=1,
                                   deterministic='co')
                        res = mdl.fit()
                        x = (res.beta[0] * stock[i][k:k + 244] +
                             res.beta[1] * stock[j][k:k + 244])
                        pBeta0[name][k + 244] = res.beta[0]
                        pBeta1[name][k + 244] = res.beta[1]
                        c = adf(x[:244], regression='c')[0]
                        pADF[name][k + 244] = c
                        pHypo[name][k + 244] = c <= -2.8741898504150574

pADF.drop(['TEMP'], axis=1, inplace=True)
pHypo.drop(['TEMP'], axis=1, inplace=True)
pBeta0.drop(['TEMP'], axis=1, inplace=True)
pBeta1.drop(['TEMP'], axis=1, inplace=True)
stock.drop(['TEMP'], axis=1, inplace=True)

pRank = pADF.rank(axis=1, method='min') * pHypo
pRank[pRank == 0] = 999

pADF.to_csv('pADF.csv')
pHypo.to_csv('pHypo.csv')
pBeta0.to_csv('pBeta0.csv')
示例#16
0
 def test_stationarity(self, coin1, coin2, beta):
     temp = coin2 - beta * coin1
     if adf(temp)[1] < self.significance:
         return True
     else:
         return False