Exemplo n.º 1
1
 def setup_class(cls):
     cls.res1 = adfuller(cls.y, regression="nc", autolag=None,
             maxlag=1)
     cls.teststat = -2.4511596
     cls.pvalue = 0.013747 # Stata does not return a p-value for noconstant
                            # this value is just taken from our results
     cls.critvalues = [-2.587,-1.950,-1.617]
     _, _1, _2, cls.store = adfuller(cls.y, regression="nc", autolag=None,
                                      maxlag=1, store=True)
def diff_nonstationary(x, alpha):
    
    """Returns number of differentiations required to transform
    a non-stationary time series into a stationary one. If 0 (zero) is
    returned, there's no need to differentiate."""
    """
    PARAMETERS:
    1) x - input time series
    2) alpha - significance level
    """
    
    i = 0 # no need to differentiate
    pvalue = adfuller(x, regression =
            ('ct' if
            stats.linregress( pd.Series(range(1, len(x)+1)), x ).pvalue<alpha
            else 'c')
            )[1]
    while pvalue>alpha:
        x = x.diff()
        pvalue = adfuller(x.dropna(),
            regression = 'c')[1]
        i += 1
        if pvalue<=alpha:
            break
    return(int(i))
    
### End of code
def cluster_vs_meta_granger_TM(c,X,M,Ml,lags=7,thresh=0.05):
	# use the Toda Yamamoto method (environmental data is stationary, but clusters are not)
	x1 = X[c].sum(0)
	adf = stattools.adfuller(x1,maxlag=lags)
	if (adf[0] > adf[4]['5%']):
		m1 = adf[2]
	else:
		m1 = 0
	R = []
	for j,x2 in enumerate(M):
		have_values = np.isfinite(x2)
		xi = x1[have_values]
		x2i = x2[have_values]
		adf = stattools.adfuller(x2i,maxlag=lags)
		if (adf[0] > adf[4]['5%']):
			m2 = adf[2]
		else:
			m2 = 0
		m = max(m1,m2)
		y = [xi[i+max(0,m2-m1):len(xi)+i-(m1+lags)] for i in range(m1+lags)] + [x2i[i+max(0,m1-m2):len(xi)+i-(m2+lags)] for i in range(m2+lags)]
		y = np.array(y).T
		lm = linear_model.OLS(xi[max(m1,m2)+lags:],y)
		result = lm.fit()
		Restr = np.eye(y.shape[1])[m+lags:]
		wald = result.wald_test(Restr)
		if wald.pvalue < thresh:
			R.append((wald.pvalue,Ml[j]))
	return m,sorted(R)
Exemplo n.º 4
0
 def __init__(self):
     self.res1 = adfuller(self.y, regression="nc", autolag=None, maxlag=1)
     self.teststat = -2.4511596
     self.pvalue = 0.013747  # Stata does not return a p-value for noconstant
     # this value is just taken from our results
     self.critvalues = [-2.587, -1.950, -1.617]
     _, _1, _2, self.store = adfuller(self.y, regression="nc", autolag=None, maxlag=1, store=True)
Exemplo n.º 5
0
    def ADF(self, v, crit='5%', max_d=6, reg='nc', autolag='AIC'):
        """ Augmented Dickey Fuller test

        Parameters
        ----------
        v: ndarray matrix
            residuals matrix

        Returns
        -------
        bool: boolean
            true if v pass the test
        """

        boolean = True
        try:
            l = v.shape[1]
            for j in range(l):
                adf = adfuller(v[:, j], max_d, reg, autolag)

                if(adf[0] < adf[4][crit]):
                    pass
                else:
                    boolean = False
                    break
        except:
            adf = adfuller(v, max_d, reg, autolag)
            if(adf[0] > adf[4][crit]):
                boolean = False

        return boolean
Exemplo n.º 6
0
def test_adfuller_short_series(reset_randomstate):
    y = np.random.standard_normal(7)
    res = adfuller(y, store=True)
    assert res[-1].maxlag == 1
    y = np.random.standard_normal(2)
    with pytest.raises(ValueError, match='sample size is too short'):
        adfuller(y)
    y = np.random.standard_normal(3)
    with pytest.raises(ValueError, match='sample size is too short'):
        adfuller(y, regression='ct')
Exemplo n.º 7
0
def testADFTest():
    import statsmodels.tsa.stattools as sts
    import statsmodels.stats.stattools as sss
    import numpy as np
    data =np.random.randn(100)
    #http://statsmodels.sourceforge.net/stable/generated/statsmodels.tsa.stattools.adfuller.html
    print sts.adfuller(data)
    
    #http://statsmodels.sourceforge.net/stable/generated/statsmodels.stats.stattools.jarque_bera.html
    print sss.jarque_bera(data)
Exemplo n.º 8
0
def adftest(y, short_flag):
	'''Augmented Dicky-Fuller test for given timeseries.
	When test-statistics (first returned value) is absolutely less than critical values,
	process could be considered as stationary one.'''
	sep = 32 * '--'
	print "\n\t\tAugmented Dicky-Fuller test\n"
	if short_flag:
		stationarity = ["stationary", "nonstationary"]

		test_c = adfuller(y, regression='c')
		stat_c = 1 if test_c[0] > test_c[4]['5%'] else 0

		test_ct = adfuller(y, regression='ct')
		stat_ct = 1 if test_ct[0] > test_ct[4]['5%'] else 0

		test_ctt = adfuller(y, regression='ctt')
		stat_ctt = 1 if test_ctt[0] > test_ctt[4]['5%'] else 0
		
		test_nc = adfuller(y, regression='nc')
		stat_nc = 1 if test_nc[0] > test_nc[4]['5%'] else 0

		print sep
		print "- constant only:\t\t\t\t{}".format(stationarity[stat_c])
		print "- constant and trend:\t\t\t\t{}".format(stationarity[stat_ct])
		print "- constant, and linear and quadratic trend:\t{}".format(stationarity[stat_ctt])
		print "\n- no constant, no trend:\t\t\t{}".format(stationarity[stat_nc])
		print sep	
	else:
		print "- constant only\n{}".format(adfuller(y,regression='c'))
		print "- constant and trend\n{}".format(adfuller(y,regression='ct'))
		print "- constant, and linear and quadratic trend\n{}".format(adfuller(y,regression='ctt'))
		print "\n- no constant, no trend\n{}".format(adfuller(y,regression='nc'))
		print sep
Exemplo n.º 9
0
 def summarize_all(self):
     if len(self.independent) == 1:
         dependent = self.dependent
         independent = self.independent[0]
         params = self.result.params
         result = self.result
         k = params[1]
         b = params[0]
         conf = result.conf_int()
         cadf = adfuller(result.resid)
         if cadf[0] <= cadf[4]['5%']:
             boolean = 'likely'
         else:
             boolean = 'unlikely'
         print
         print ("{:^40}".format("{} vs {}".format(dependent.upper(), independent.upper())))
         print ("%20s %s = %.4f * %s + %.4f" % ("Model:", dependent, k, independent, b))
         print ("%20s %.4f" % ("R square:", result.rsquared))
         print ("%20s [%.4f, %.4f]" % ("Confidence interval:", conf.iloc[1, 0], conf.iloc[1, 1]))
         print ("%20s %.4f" % ("Model error:", result.resid.std()))
         print ("%20s %s" % ("Mean reverting:", boolean))
         print ("%20s %d" % ("Half life:", half_life(result.resid)))
     else:
         dependent = self.dependent
         independent = self.independent  # list
         params = self.result.params
         result = self.result
         b = params[0]
         conf = result.conf_int()  # pandas
         cadf = adfuller(result.resid)
         if cadf[0] <= cadf[4]['5%']:
             boolean = 'likely'
         else:
             boolean = 'unlikely'
         print
         print ("{:^40}".format("{} vs {}".format(dependent.upper(), (', '.join(independent)).upper())))
         string = []
         for i in range(len(independent)):
             string.append("%.4f * %s" % (params[independent[i]], independent[i]))
         print ("%20s %s = %s + %.4f" % ("Model:", dependent, ' + '.join(string), b))
         print ("%20s %.4f" % ("R square:", result.rsquared))
         string = []
         for i in range(len(independent)):
             string.append("[%.4f, %.4f]" % (conf.loc[independent[i], 0], conf.loc[independent[i], 1]))
         print ("%20s %s" % ("Confidence interval:", ' , '.join(string)))
         print ("%20s %.4f" % ("Model error:", result.resid.std()))
         print ("%20s %s" % ("Mean reverting:", boolean))
         print ("%20s %d" % ("Half life:", half_life(result.resid)))
def _adsf_score(series, times, window_length):

	ret  = []
	for t in times:
		ret.append(ts.adfuller(series[t:t+window_length])[0])

	return ret
Exemplo n.º 11
0
def dickeyfuller_fcn(data,maxlag):
    #@FORMAT: data = np(values)
    try:
        df_fcn = adfuller(data,maxlag)
        return df_fcn[1]
    except:
        return np.nan
Exemplo n.º 12
0
    def test_stationarity(self, timeseries, window, return_plot=False):
        
        dftest = adfuller(timeseries, autolag='AIC')
        dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
        for key, value in dftest[4].items():
            dfoutput['Critical Value (%s)' % key] = value       
        
        if return_plot:
            # Determing rolling statistics
            rolmean = timeseries.rolling(window = window, center = False).mean()

            rolstd = timeseries.rolling(window = window, center = False).std()
        
            # Plot rolling statistics:
            orig = plt.plot(timeseries, color='blue', label='Original')
            mean = plt.plot(rolmean, color='red', label='Rolling Mean')
            std = plt.plot(rolstd, color='black', label='Rolling Std')
            plt.legend(loc='best')
            plt.title('Rolling Mean & Standard Deviation')
            plt.show(block=False)
        
            # Perform Dickey-Fuller test:
            print('Results of Dickey-Fuller Test:')
                
            print(dfoutput)
            
            
        return dfoutput
Exemplo n.º 13
0
    def test_frame_timeseries_dickey_fuller_constant_trend_squared(self):
        """Test Augmented Dickey Fuller with constant, trend, and trend squared regression"""
        result = self.frame.timeseries_augmented_dickey_fuller_test("logM", max_lag=1, regression="ctt")
        df_ctt_result = smtsa.adfuller(self.pandaframe["logM"], maxlag=1, regression="ctt")

        self.assertAlmostEqual(result.p_value, df_ctt_result[1], delta=0.0001)
        self.assertAlmostEqual(result.test_stat, df_ctt_result[0], delta=0.01)
Exemplo n.º 14
0
    def test_frame_timeseries_dickey_fuller_no_constant(self):
        """Test Augmented Dickey Fuller with no constant regression"""
        result = self.frame.timeseries_augmented_dickey_fuller_test("logM", max_lag=1, regression="nc")
        df_nc_result = smtsa.adfuller(self.pandaframe["logM"], maxlag=1, regression="nc")

        self.assertAlmostEqual(result.p_value, df_nc_result[1], delta=0.0001)
        self.assertAlmostEqual(result.test_stat, df_nc_result[0], delta=0.01)
Exemplo n.º 15
0
def stationarity(timeseries):
	#Determing rolling statistics
	rol_mean = timeseries.rolling(window=12).mean()
	rol_std = timeseries.rolling(window=12).std()
	#Plot rolling statistics:
	fig, ax = plt.subplots()
	plt.grid(color='grey', which='major', axis='y', linestyle='--')
	plt.plot(timeseries, color='blue', label='Original', linewidth=1.25)
	plt.plot(rol_mean, color='red', label='Rolling Mean', linewidth=1.25)
	plt.plot(rol_std, color='black', label = 'Rolling Std', linewidth=1.25)
	plt.legend(loc='best')
	title = headers[1], data[index].iloc[0], '-' ,data[index].iloc[-1]
	plt.title(title)
	plt.tick_params(axis="both", which="both", bottom="on", top="off",    
		            labelbottom="on", left="off", right="off", labelleft="on")
	ax.spines['right'].set_visible(False)
	ax.spines['top'].set_visible(False)
	ax.xaxis.set_ticks_position('bottom') 
	fig.title = ('stationarity.png')
	fig.savefig(fig.title, bbox_inches="tight")
	#Perform Dickey-Fuller test:
	print ('Results of Dickey-Fuller Test:\n')
	df_test = adfuller(timeseries, autolag='AIC')
	df_output = pd.Series(df_test[0:4], index=['Test Statistic','p-value','#Lags Used','No. of Observations Used'])
	for key,value in df_test[4].items():
	    df_output['Critical Value (%s)'%key] = value
	print (df_output.round(3))
Exemplo n.º 16
0
def cointegration_test(symbol, etf):
    # Step 1: regress one variable on the other
    ols_result = sm.OLS(instPrices[symbol], etfPrices).fit()
    # Step 2: obtain the residual (ols_resuld.resid)
    # Step 3: apply Augmented Dickey-Fuller test to see whether 
    # the residual is unit root    
    return ts.adfuller(ols_result.resid)
Exemplo n.º 17
0
 def __init__(self):
     self.res1 = adfuller(self.x, regression="nc", autolag=None, maxlag=4)
     self.teststat = 3.5227498
     self.pvalue = 0.99999  # Stata does not return a p-value for noconstant.
     # Tau^max in MacKinnon (1994) is missing, so it is
     # assumed that its right-tail is well-behaved
     self.critvalues = [-2.587, -1.950, -1.617]
Exemplo n.º 18
0
    def stationary(self):
        """Evaluate wether the timeseries is stationary.

        non-stationary timeseries are probably random walks and not
        suitable for forecasting.

        Args:
            None

        Returns:
            state: True if stationary

        """
        # Initialize key variables
        state = False
        values = []

        # statistical test
        result = adfuller(self._y_current)
        adf = result[0]
        print('> Stationarity Test:')
        print('  ADF Statistic: {:.3f}'.format(adf))
        print('  p-value: {:.3f}'.format(result[1]))
        print('  Critical Values:')
        for key, value in result[4].items():
            print('\t{}: {:.3f}'.format(key, value))
            values.append(value)

        # Return
        if adf < min(values):
            state = True
        return state
Exemplo n.º 19
0
def adfuller_json(ts, autolag="AIC"):
    """
    Wrapper to perform Dickey-Fuller test and return results in json.
    
    Params: 
        ts - a 1d np.array
        autolag - autolag parameter for adfuller, AIC by default
        
    Output:
        res - dict with results
        crit - dict with critical values
        
        prints a table of results as a side effect
    """
    r = adfuller(ts, autolag=autolag)
    res = dict(stat=r[0],
               pval=r[1],
               nlags=r[2],
               nobs=r[3])
               
    neatoutput = pd.Series(r[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    crit = {}
    for key,value in r[4].items():
        crit['crit{0}'.format(key)] = value
        neatoutput['Critical Value ({0})'.format(key)] = value
    print(neatoutput)
    return res, crit
Exemplo n.º 20
0
def is_stationary(x, p = 10):

    x = np.array(x)
    result = ts.adfuller(x, regression='ctt')
    #1% level
    if p == 1:
        #if DFStat <= critical value
        if result[0] >= result[4]['1%']:        #DFstat is less negative
            #is stationary
            return True
        else:
            #is nonstationary
            return False
    #5% level
    if p == 5:
        #if DFStat <= critical value
        if result[0] >= result[4]['5%']:        #DFstat is less negative
            #is stationary
            return True
        else:
            #is nonstationary
            return False
    #10% level
    if p == 10:
        #if DFStat <= critical value
        if result[0] >= result[4]['10%']:        #DFstat is less negative
            #is stationary
            return True
        else:
            #is nonstationary
            return False
Exemplo n.º 21
0
def testStationarity(ts):
    dftest = adfuller(ts)
    # 对上述函数求得的值进行语义描述
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    return dfoutput
Exemplo n.º 22
0
def passes_dftest(data):
	if statmodel.adfuller(data[0][1][1], 250, 'ctt', 't-stat', False, False)[0] < 1:
		data[0][1][0] = True
		return data
	else:
		data[0][1][0] = False
		return data
Exemplo n.º 23
0
def runSingleMRTest(ticker):
	dr = DataReader(ticker, "yahoo", datetime(datetime.now().year-1,datetime.now().month,datetime.now().day), datetime.now())
	ts = dr['Adj Close']

	# ADF test with a lag order value of 2
	adf = ts_tool.adfuller(ts, 2)
	print adf[0] # adf test-statistic
	print adf[1] # p-value
	print adf[4] # 1/5/10 test statistic
	print hurst(ts)

	# calculate half-life  
	md = sm.OLS(ts.diff(), sm.add_constant(ts.shift()), missing='drop')  
	mdf = md.fit()  
	half_life = -np.log(2)/mdf.params[1]  
	lookback = np.round(half_life) 

	print lookback

	# calculate VaR using Variance-covariance
	c = 0.95
	rets = ts.pct_change()
	mu = np.mean(rets)
	sigma = np.std(rets)

	# The VaR returned is daily possible loss
	# to convert it to monthly, mu = mu * sqrt(20)
	# or annually, mu = mu * sqrt(250)
	alpha = norm.ppf(1-c, mu, sigma)

	print -alpha
Exemplo n.º 24
0
def stationarity_test(ts):
    """@ brief Helper function to determine if a series is stationary."""
    # Determining rolling mean and variance.
    rol_mean = ts.rolling(window=12, center=False).mean()
    rol_std = ts.rolling(window=12, center=False).std()

    # Plot rolling statistics.
    plt.plot(ts, color='#da4264', label='Original')
    plt.plot(rol_mean, color='#391a5a', label='Rolling Mean')
    plt.plot(rol_std, color='#369acd', label='Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean and Standard Deviation')
    plt.show()

    # Perform Dickey-Fuller test:
    print 'Results of Dickey-Fuller Test:'
    dftest = adfuller(ts, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic',
                                             'p-value',
                                             '#Lags Used',
                                             'Number of Observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print dfoutput
    print "\nConclusion:"
    for key, value in dftest[4].items():
        if dfoutput['Test Statistic'] < dfoutput['Critical Value (%s)' % key]:
            print "Non-stationary series: Reject",
        else:
            print "Non-stationary series: Accept",
        print "at %s level" % key
Exemplo n.º 25
0
 def test_stationary(self, securities_list, beg_date, end_date):
     """ Method checks the stationarity of the price series.
     Works on internal list of securities.
     Returns: True/False if there is only one security, and a list of True/False if there are many securities.
        (True - stationary; False - non-stationary)
         ADF test is used.
     Parameter: lb => look-back period (in days). Default is 30 days.
         (Note also that the order of the results (indexes) are the same as the indexes for the list of
         securities contained in the current class object. It's a hint for you to know how to find security names for
         True results)."""
     if not isinstance(securities_list, list):
         securities = [securities_list]
     else:
         securities = securities_list
     prices = []
     for j in range(0, len(securities)):
         tmp = securities[j].get_prices(beg_date, end_date)
         prices.append(tmp)
     results = []
     for price_series in prices:
         print(price_series)
         tmp = ts.adfuller(price_series, regression="c", autolag='AIC')
         results.append(tmp)
     simple_results = [BasicMR.simplify_adf_results(result[1], result[0], float(result[4]['5%'])) for result in results]
     if len(simple_results) == 1:
         return simple_results
     else:
         return simple_results
Exemplo n.º 26
0
 def teste_estacionariedade(self, timeseries):
     
     '''
     Este metodo tem por testar a estacionariedade de uma serie com o teste adfuller
     :param: timeseries: serie temporal, array
     :return: print com as estatisticas do teste
     '''
     
     #Determing rolling statistics
     timeseries = pd.DataFrame(timeseries)
     rolmean = timeseries.rolling(window=12, center=False).mean()
     rolstd = timeseries.rolling(window=12, center=False).std()
         
     #Perform Dickey-Fuller test:
     print('Results of Dickey-Fuller Test:')
     timeseries = timeseries[1:].values
     dftest = adfuller(timeseries, autolag='AIC')
     dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
     for key,value in dftest[4].items():
         dfoutput['Critical Value (%s)'%key] = value
     print(dfoutput)
     
 
     #Plot rolling statistics:
     orig = plt.plot(timeseries, color='blue',label='Original')
     mean = plt.plot(rolmean, color='red', label='Rolling Mean')
     std = plt.plot(rolstd, color='black', label = 'Rolling Std')
     plt.legend(loc='best')
     plt.title('Rolling Mean & Standard Deviation')
     plt.show()
Exemplo n.º 27
0
def is_stationary(ts, test_window):
    """
	This function checks whether the given TS is stationary. Can make it boolean, but lets just leave it
	for visualisation purposes. Not to be run once the numbers have been fixed.
	"""

    # Determine the rolling statistics (places like these compelled me to use Pandas and not numpy here)
    rol_mean = pd.rolling_mean(ts, window=test_window)
    rol_std = pd.rolling_std(ts, window=test_window)

    # Plot rolling statistics:
    orig = plt.plot(ts, color="blue", label="Original")
    mean = plt.plot(rol_mean, color="red", label="Rolling Mean")
    std = plt.plot(rol_std, color="black", label="Rolling Std")
    plt.legend(loc="best")
    plt.title("Rolling Mean & Standard Deviation")
    plt.show()

    # Perform the  Dickey-Fuller test: (Check documentation of fn for return params)
    print "Results of Dickey-Fuller Test:"
    dftest = adfuller(timeseries, autolag="AIC")
    dfoutput = pd.Series(dftest[0:4], index=["Test Statistic", "p-value", "#Lags Used", "Number of Observations Used"])
    for key, value in dftest[4].items():
        dfoutput["Critical Value (%s)" % key] = value
    print dfoutput
Exemplo n.º 28
0
def ADF(ticker,start,end):
    print('ADF')
    
    stock = DataReader(ticker, "yahoo", start, end)
    
    result = ts.adfuller(stock['Adj Close'], 1)
    print(result)
    print('')
    
    test = result[0]
    crit = result[4]
    one = crit['1%']
    five = crit['5%']
    ten = crit['10%']
    
    if test<one:
        print('Lesser than 1%')
        print('-----------------------------------------')
        return stock
        
    if test<five:
        print('Lesser than 5%')
        print('-----------------------------------------')
        return stock
        
    if test<ten:
        print('Lesser than 10%')
        print('-----------------------------------------')
        return stock
        
    print('Cannot reject Null Hypothesis')
    print('-----------------------------------------')
    return stock
Exemplo n.º 29
0
def test_stationarity(timeseries):

    # http://www.seanabu.com/2016/03/22/time-series-seasonal-ARIMA-model-in-python/

    # Determing rolling statistics
    rolmean = pd.rolling_mean(timeseries, window=12)
    rolstd = pd.rolling_std(timeseries, window=12)

    # Plot rolling statistics:
    fig = plt.figure(figsize=(12, 8))
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show()

    # Perform Dickey-Fuller test:
    print 'Results of Dickey-Fuller Test:'
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(
        dftest[0:4],
        index=[
            'Test Statistic',
            'p-value',
            '#Lags Used',
            'Number of Observations Used'
        ]
    )
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print dfoutput
Exemplo n.º 30
0
 def get_adf_test(self, x, p):
     result = ts.adfuller(x)
     pvalue = result[1]                
     if pvalue < p:
         test_result = True
     else:
         test_result = False
     return test_result, pvalue
Exemplo n.º 31
0
def check_stationarity(df):
    results = []
    for ticker, prices in df.items():
        results.append([ticker, adfuller(prices, regression='ct')[1]])
    return pd.DataFrame(results, columns=['ticker', 'adf']).sort_values('adf')
    "gyr_phone_y_freq_0.9_Hz_ws_40", "pca_3", "pca_3_temp_mean_ws_120",
    "pca_4_temp_mean_ws_120", "acc_phone_z_freq_0.9_Hz_ws_40", "pca_5",
    "pca_4", "acc_phone_y_freq_0.9_Hz_ws_40", "gyr_phone_z_freq_0.5_Hz_ws_40",
    "gyr_phone_x_freq_0.1_Hz_ws_40"
]
possible_feature_sets = [
    basic_features, features_after_chapter_3, features_after_chapter_4,
    features_after_chapter_5, selected_features
]
feature_names = [
    'initial set', 'Chapter 3', 'Chapter 4', 'Chapter 5', 'Selected features'
]

# Let us first study whether the time series is stationary and what the autocorrelations are.

dftest = adfuller(dataset['acc_phone_x'], autolag='AIC')

plt.Figure()
autocorrelation_plot(dataset['acc_phone_x'])
DataViz.save(plt)
plt.show()

# Now let us focus on the learning part.

learner = TemporalRegressionAlgorithms()
eval = RegressionEvaluation()

# We repeat the experiment a number of times to get a bit more robust data as the initialization of e.g. the NN is random.

repeats = 3
Exemplo n.º 33
0
plt.xlabel('JAREN')
plt.ylabel('RUIS')
plt.show()

plt.rcParams.update({'figure.figsize': (10, 10)})
result.plot().suptitle('Multiplicatieve Decompositie', fontsize=12)
plt.show()

yshow = y[:len(y-11)]
y, ytest = y[:(len(y)-12)], y[(len(y)-12):len(y)]
# The term bfill means that we use the value before filling in missing values
# To be secure.The data about incidents do not have missing values. So in this
# case the action is obsolete.
y = y.fillna(y.bfill())

result = adfuller(y)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

stepwise_model = auto_arima(y, start_p=1, start_q=1,
                           max_p=3, max_q=3, m=12,
                           start_P=0, seasonal=True,
                           d=1, D=1, trace=True,
                           error_action='ignore',  
                           suppress_warnings=True, 
                           stepwise=True)

print(stepwise_model.aic())
Exemplo n.º 34
0
 def __init__(self):
     self.res1 = adfuller(self.y, regression="nc", autolag=None, maxlag=1)
     self.teststat = -2.4511596
     self.pvalue = 0.013747  # Stata does not return a p-value for noconstant
     # this value is just taken from our results
     self.critvalues = [-2.587, -1.950, -1.617]
Exemplo n.º 35
0
if __name__ == "__main__":
    start = datetime.datetime(2012, 1, 1)
    end = datetime.datetime(2013, 1, 1)

    arex = web.DataReader("AREX", "yahoo", start, end)
    wll = web.DataReader("WLL", "yahoo", start, end)

    df = pd.DataFrame(index=arex.index)
    df["AREX"] = arex["Adj Close"]
    df["WLL"] = wll["Adj Close"]

    # Plot the two time series
    plot_price_series(df, "AREX", "WLL")

    # Display a scatter plot of the two time series
    plot_scatter_series(df, "AREX", "WLL")

    # Calculate optimal hedge ratio "beta"
    res = ols(y=df['WLL'], x=df["AREX"])
    beta_hr = res.beta.x

    # Calculate the residuals of the linear combination
    df["res"] = df["WLL"] - beta_hr * df["AREX"]

    # Plot the residuals
    plot_residuals(df)

    # Calculate and output the CADF test on the residuals
    cadf = ts.adfuller(df["res"])
    pprint.pprint(cadf)
Exemplo n.º 36
0
 def __init__(self):
     self.res1 = adfuller(self.y, regression="ct", autolag=None, maxlag=1)
     self.teststat = -4.425093
     self.pvalue = .00199633
     self.critvalues = [-4.006, -3.437, -3.137]
Exemplo n.º 37
0
 def __init__(self):
     self.res1 = adfuller(self.x, regression="ct", autolag=None, maxlag=4)
     self.teststat = -1.8566374
     self.pvalue = .67682968
     self.critvalues = [-4.007, -3.437, -3.137]
Exemplo n.º 38
0
    df = web.DataReader(stock_code, data_source='yahoo', start=start_date, end=end_date)
    return df

# stock_code='000001'   #股票代码平安银行
# stock_code='600519'   #股票代码贵州茅台
stock_code='600522'   #股票代码中天科技
datalen=365
content='Close'
data=get_stock_history(stock_code,datalen,content)
print(data)

subdata = data.iloc[:,:4]
# print(subdata.shape)

#平稳性检验
pvalue = stat.adfuller(subdata.values[:,3], 1)[1]
print("指标 ",data.columns[3]," 单位根检验的p值为:",pvalue)

#一阶差分并进行平稳性检验
subdata_diff1 = subdata.iloc[1:,:].values - subdata.iloc[:-1,:].values
pvalue = stat.adfuller(subdata_diff1[:,3], 1)[1]
print("指标 ",data.columns[3]," 单位根检验的p值为:",pvalue)

#  模型阶数从1开始逐一增加
rows, cols = subdata_diff1.shape
aicList = []
lmList = []

for p in range(1,11):
    baseData = None
    for i in range(p, rows):
Exemplo n.º 39
0
        #ADF Test
        Open_table_index = []
        int_w = []
        for i in range(len(arg_mix)):
            if arg_mix[i] != 0:
                w1, w2 = num_weight(table.w1.iloc[i], table.w2.iloc[i],
                                    Open_price[i, 0], Open_price[i, 1], maxi,
                                    capital)
                if Adf:
                    if tick:
                        ADF_spread = w1 * np.log( test_stock1[i,:(150+arg_test[i])] ) +\
                                     w2 * np.log( test_stock2[i,:(150+arg_test[i])] )
                    else:  #此處僅為表示tick與非tick算式一樣但想法上有本質上的不同
                        ADF_spread = w1 * np.log( test_stock1[i,:(150+arg_test[i])] ) +\
                                     w2 * np.log( test_stock2[i,:(150+arg_test[i])] )
                    if adfuller(ADF_spread, regression='c')[1] <= 0.05:
                        Open_table_index.append(i)
                        int_w.append([w1, w2])
                else:
                    Open_table_index.append(i)
                    int_w.append([w1, w2])

        Open_table_index = np.array(Open_table_index)
        #delete useless table row
        table = table.iloc[Open_table_index, :]
        arg_test = arg_test[Open_table_index]
        arg_mix = arg_mix[Open_table_index]
        ClPos = ClPos[Open_table_index]
        ClPos_test = ClPos_test[Open_table_index]
        LongOrShort = LongOrShort[Open_table_index]
        Open_price = Open_price[Open_table_index]
Exemplo n.º 40
0
def granger(array_X, array_Y, X_name, Y_name, para_set, path_to_output):

    step = para_set.step
    lag = para_set.lag
    test_mode = para_set.test_mode
    significant_thres = para_set.significant_thres
    min_segment_len = para_set.min_segment_len
    max_segment_len = para_set.max_segment_len

    n_sample = len(array_X)

    print('sample size: ' + str(n_sample))

    # ===================================================  initialization =================================================

    cnt_prune_YX = cnts_prune(0, 0, 0, 0)
    cnt_prune_XY = cnts_prune(0, 0, 0, 0)

    time_prune_XY = time_prune(0, 0, 0, 0)
    time_prune_YX = time_prune(0, 0, 0, 0)

    print(X_name)

    time1 = timeit.default_timer()

    time_granger = 0
    time_adf = 0

    array_YX = np.concatenate((array_Y, array_X), axis=1)
    array_XY = np.concatenate((array_X, array_Y), axis=1)

    n_step = int(n_sample / step - 1)
    list_segment_split = [step * i for i in range(n_step)]
    list_segment_split.append(n_sample - 1)

    start = 0
    end = 0

    total_cnt_segment_YX = 0
    total_cnt_segment_XY = 0
    total_cnt_segment_adf = 0
    total_cnt_segment_cal_adf = 0
    total_cnt_segment_examine_adf_Y = 0

    array_results_YX = np.full((n_step + 1, n_step + 1), -2, dtype=float)
    array_results_XY = np.full((n_step + 1, n_step + 1), -2, dtype=float)

    array_adf_results_X = np.full((n_step + 1, n_step + 1), -2, dtype=float)
    array_adf_results_Y = np.full((n_step + 1, n_step + 1), -2, dtype=float)

    # get lagged data

    dta_YX, dtaown_YX, dtajoint_YX = parts.get_lagged_data(array_YX,
                                                           lag,
                                                           addconst=True,
                                                           verbose=False)
    dta_XY, dtaown_XY, dtajoint_XY = parts.get_lagged_data(array_XY,
                                                           lag,
                                                           addconst=True,
                                                           verbose=False)

    # make the data to the original length

    #     dta_YX = np.concatenate((np.zeros((lag, np.shape(dta_YX)[1])), dta_YX), axis = 0)
    #     dtaown_YX = np.concatenate((np.zeros((lag, np.shape(dtaown_YX)[1])), dtaown_YX), axis = 0)
    #     dtajoint_YX = np.concatenate((np.zeros((lag, np.shape(dtajoint_YX)[1])), dtajoint_YX), axis = 0)
    #     dta_XY = np.concatenate((np.zeros((lag, np.shape(dta_XY)[1])), dta_XY), axis = 0)
    #     dtaown_XY = np.concatenate((np.zeros((lag, np.shape(dtaown_XY)[1])), dtaown_XY), axis = 0)
    #     dtajoint_XY = np.concatenate((np.zeros((lag, np.shape(dtajoint_XY)[1])), dtajoint_XY), axis = 0)
    #
    #     dtaown_YX[:lag,-1] = 1
    #     dtajoint_YX[:lag,-1] = 1
    #     dtaown_XY[:lag,-1] = 1
    #     dtajoint_XY[:lag,-1] = 1

    # maintain a non_zero flag to update degree of freedom

    if_non_zero_columns_YX = np.zeros(np.shape(dtajoint_YX)[1])
    if_non_zero_columns_XY = np.zeros(np.shape(dtajoint_XY)[1])

    # begin loop

    for i in range(n_step):
        start = list_segment_split[i]

        print(str(start) + '/' + str(len(array_YX)))

        reset_cnt_YX = -1
        res2down_YX = None
        res2djoint_YX = None
        res2down_ssr_upper_YX = 0
        res2down_ssr_lower_YX = 0
        res2djoint_ssr_upper_YX = 0
        res2djoint_ssr_lower_YX = 0
        res2djoint_df_resid_YX = 0

        reset_cnt_XY = -1
        res2down_XY = None
        res2djoint_XY = None
        res2down_ssr_upper_XY = 0
        res2down_ssr_lower_XY = 0
        res2djoint_ssr_upper_XY = 0
        res2djoint_ssr_lower_XY = 0
        res2djoint_df_resid_XY = 0

        for j in range(i + 1, n_step + 1):

            end = list_segment_split[j]

            dta_start = start
            dta_end = end - lag

            if (len(array_YX[start:end, :]) < min_segment_len
                    or len(array_YX[start:end, :]) > max_segment_len):

                if_non_zero_columns_YX = np.logical_or(
                    np.sum(dtajoint_YX[dta_end - step:dta_end, :], axis=0) !=
                    0, if_non_zero_columns_YX)
                if_non_zero_columns_XY = np.logical_or(
                    np.sum(dtajoint_XY[dta_end - step:dta_end, :], axis=0) !=
                    0, if_non_zero_columns_XY)

                continue

            # =======================================================  F test =======================================================

            time3 = timeit.default_timer()

            if test_mode == 'standard':

                p_value_YX, res2down_YX, res2djoint_YX = granger_std.grangercausalitytests(
                    dta_YX[dta_start:dta_end],
                    dtaown_YX[dta_start:dta_end],
                    dtajoint_YX[dta_start:dta_end],
                    lag,
                    addconst=True,
                    verbose=False)
                if p_value_YX < significant_thres:
                    p_value_XY, res2down_XY, res2djoint_XY = granger_std.grangercausalitytests(
                        dta_XY[dta_start:dta_end],
                        dtaown_XY[dta_start:dta_end],
                        dtajoint_XY[dta_start:dta_end],
                        lag,
                        addconst=True,
                        verbose=False)
                else:
                    p_value_XY = -1

            elif test_mode == 'fast_version_1':  #only check F_upper

                p_value_YX, res2down_YX, res2djoint_YX, res2down_ssr_upper_YX, res2djoint_ssr_lower_YX, res2djoint_df_resid_YX, reset_cnt_YX, if_non_zero_columns_YX = prune.grangercausalitytests_check_F_upper(
                    dta_YX[dta_start:dta_end],
                    dtaown_YX[dta_start:dta_end],
                    dtajoint_YX[dta_start:dta_end],
                    lag,
                    res2down_YX,
                    res2djoint_YX,
                    res2down_ssr_upper_YX,
                    res2djoint_ssr_lower_YX,
                    res2djoint_df_resid_YX,
                    if_non_zero_columns_YX,
                    significant_thres,
                    step,
                    reset_cnt_YX,
                    addconst=True,
                    verbose=False)
                if p_value_YX < significant_thres and p_value_YX >= 0:
                    p_value_XY, res2down_XY, res2djoint_XY = granger_std.grangercausalitytests(
                        dta_XY[dta_start:dta_end],
                        dtaown_XY[dta_start:dta_end],
                        dtajoint_XY[dta_start:dta_end],
                        lag,
                        addconst=True,
                        verbose=False)
                else:
                    p_value_XY = -1

            elif test_mode == 'fast_version_2':  # check F_upper then check F_lower

                total_cnt_segment_YX += 1

                p_value_YX, res2down_YX, res2djoint_YX, res2down_ssr_upper_YX, res2down_ssr_lower_YX, res2djoint_ssr_upper_YX, res2djoint_ssr_lower_YX, res2djoint_df_resid_YX, reset_cnt_YX, cnt_prune_YX, time_prune_YX, if_non_zero_columns_YX \
                = prune.grangercausalitytests_check_F_upper_lower(dta_YX[dta_start:dta_end], dtaown_YX[dta_start:dta_end], dtajoint_YX[dta_start:dta_end], lag, res2down_YX, res2djoint_YX, res2down_ssr_upper_YX, res2down_ssr_lower_YX, res2djoint_ssr_upper_YX, res2djoint_ssr_lower_YX, res2djoint_df_resid_YX, if_non_zero_columns_YX, significant_thres, step, reset_cnt_YX, cnt_prune_YX, time_prune_YX, addconst=True, verbose=False)

                if p_value_YX < significant_thres and p_value_YX >= 0:
                    total_cnt_segment_XY += 1
                    p_value_XY, res2down_XY, res2djoint_XY = granger_std.grangercausalitytests(
                        dta_XY[dta_start:dta_end],
                        dtaown_XY[dta_start:dta_end],
                        dtajoint_XY[dta_start:dta_end],
                        lag,
                        addconst=True,
                        verbose=False)
                else:
                    p_value_XY = -1

            elif test_mode == 'fast_version_3':  # check YX then check XY

                total_cnt_segment_YX += 1

                p_value_YX, res2down_YX, res2djoint_YX, res2down_ssr_upper_YX, res2down_ssr_lower_YX, res2djoint_ssr_upper_YX, res2djoint_ssr_lower_YX, res2djoint_df_resid_YX, reset_cnt_YX, cnt_prune_YX, time_prune_YX, if_non_zero_columns_YX \
                = prune.grangercausalitytests_check_F_upper_lower(dta_YX[dta_start:dta_end], dtaown_YX[dta_start:dta_end], dtajoint_YX[dta_start:dta_end], lag, res2down_YX, res2djoint_YX, res2down_ssr_upper_YX, res2down_ssr_lower_YX, res2djoint_ssr_upper_YX, res2djoint_ssr_lower_YX, res2djoint_df_resid_YX, if_non_zero_columns_YX, significant_thres, step, reset_cnt_YX, cnt_prune_YX, time_prune_YX, addconst=True, verbose=False)

                if p_value_YX < significant_thres and p_value_YX >= 0:
                    total_cnt_segment_XY += 1
                    p_value_XY, res2down_XY, res2djoint_XY, res2down_ssr_upper_XY, res2down_ssr_lower_XY, res2djoint_ssr_upper_XY, res2djoint_ssr_lower_XY, res2djoint_df_resid_XY, reset_cnt_XY, cnt_prune_XY, time_prune_XY, if_non_zero_columns_XY \
                = prune.grangercausalitytests_check_F_upper_lower(dta_XY[dta_start:dta_end], dtaown_XY[dta_start:dta_end], dtajoint_XY[dta_start:dta_end], lag, res2down_XY, res2djoint_XY, res2down_ssr_upper_XY, res2down_ssr_lower_XY, res2djoint_ssr_upper_XY, res2djoint_ssr_lower_XY, res2djoint_df_resid_XY, if_non_zero_columns_XY, significant_thres, step, reset_cnt_XY, cnt_prune_XY, time_prune_XY, addconst=True, verbose=False)
                else:
                    p_value_XY = -1

                    if res2down_XY != None and res2djoint_XY != None:
                        res2down_ssr_upper_XY, res2down_ssr_lower_XY, res2djoint_ssr_upper_XY, res2djoint_ssr_lower_XY, res2djoint_df_resid_XY, if_non_zero_columns_XY = prune.update_bound(
                            dta_XY[dta_start:dta_end],
                            dtaown_XY[dta_start:dta_end],
                            dtajoint_XY[dta_start:dta_end],
                            res2down_XY,
                            res2djoint_XY,
                            res2down_ssr_upper_XY,
                            res2down_ssr_lower_XY,
                            res2djoint_ssr_upper_XY,
                            res2djoint_ssr_lower_XY,
                            res2djoint_df_resid_XY,
                            if_non_zero_columns_XY,
                            lag,
                            step,
                            addconst=True,
                            verbose=False)

                        if res2down_XY.ssr > res2down_ssr_upper_XY or res2djoint_XY.ssr > res2djoint_ssr_upper_XY:
                            print('error')

            array_results_YX[i, j] = p_value_YX
            array_results_XY[i, j] = p_value_XY

            time4 = timeit.default_timer()

            time_granger += (time4 - time3)

            # ====================================== stationary test ====================================================

            time5 = timeit.default_timer()

            if para_set.cal_stationary_separately == 0:

                total_cnt_segment_adf += 1

                if p_value_YX < significant_thres and p_value_YX >= 0 and p_value_XY > significant_thres:

                    total_cnt_segment_examine_adf_Y += 1

                    adfstat_Y, pvalue_Y, usedlag_Y, nobs_Y, critvalues_Y, icbest_Y = adfuller(
                        array_XY[start:end, 1], lag)

                    if pvalue_Y < significant_thres and pvalue_Y >= 0:

                        adfstat_X, pvalue_X, usedlag_X, nobs_X, critvalues_X, icbest_X = adfuller(
                            array_XY[start:end, 0], lag)
                        total_cnt_segment_cal_adf += 1

                    else:

                        pvalue_X = -1

                else:
                    pvalue_Y = -1
                    pvalue_X = -1

            else:

                total_cnt_segment_examine_adf_Y += 1

                adfstat_Y, pvalue_Y, usedlag_Y, nobs_Y, critvalues_Y, icbest_Y = adfuller(
                    array_XY[start:end, 1], lag)

                if pvalue_Y < significant_thres and pvalue_Y >= 0:

                    adfstat_X, pvalue_X, usedlag_X, nobs_X, critvalues_X, icbest_X = adfuller(
                        array_XY[start:end, 0], lag)
                    total_cnt_segment_cal_adf += 1

                else:

                    pvalue_X = -1

            array_adf_results_Y[i, j] = pvalue_Y
            array_adf_results_X[i, j] = pvalue_X

            time6 = timeit.default_timer()

            time_adf += (time6 - time5)

    time2 = timeit.default_timer()

    total_time = time2 - time1

    print('total time: ' + str(time2 - time1))

    time_set = [time1, time2, time_granger, time_adf]
    cnt_set = [
        total_cnt_segment_YX, cnt_prune_YX, time_prune_YX,
        total_cnt_segment_XY, cnt_prune_XY, time_prune_XY,
        total_cnt_segment_adf, total_cnt_segment_cal_adf,
        total_cnt_segment_examine_adf_Y
    ]

    output.output_causal(path_to_output, X_name, Y_name, time_set, cnt_set,
                         array_results_YX, array_results_XY,
                         array_adf_results_X, array_adf_results_Y,
                         list_segment_split, para_set)

    return total_time, time_granger, time_adf, total_cnt_segment_YX, cnt_prune_YX, time_prune_YX, total_cnt_segment_XY, cnt_prune_XY, time_prune_XY
Exemplo n.º 41
0
 def setup_class(cls):
     cls.res1 = adfuller(cls.y, regression="ct", autolag=None,
             maxlag=1)
     cls.teststat = -4.425093
     cls.pvalue = .00199633
     cls.critvalues = [-4.006, -3.437, -3.137]
Exemplo n.º 42
0
# calculate stationarity test of time series data
from pandas import read_csv
from statsmodels.tsa.stattools import adfuller
series = read_csv('daily-total-female-births.csv',
                  header=0,
                  index_col=0,
                  parse_dates=True,
                  squeeze=True)
X = series.values
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))
Exemplo n.º 43
0
# 3.单位根检验:Dickey-Fuller test
# 单位根检验是指检验序列中是否存在单位根,如果存在单位根,那就是非平稳时间序列。

from statsmodels.tsa.stattools import adfuller
import pandas as pd
import numpy as np

df = pd.read_csv("../production.csv")
data = np.array(df["production"].values)
result = adfuller(data)
output = {
    'Test Statistic Value': result[0],
    'p-value': result[1],
    'Lags Used': result[2],
    'Number of Observations Used': result[3],
    'Critical Value(1%)': result[4]['1%'],
    'Critical Value(5%)': result[4]['5%'],
    'Critical Value(10%)': result[4]['10%']
}
print(output)
# 返回结果:
# Test Statistic Value: 统计值
# p-value: t统计量对应的概率值,p值要小于给定的显著性水平才可以拒绝假设
# p值越接近零越好
# 如果p_value接近于 0.05 时,则要通过临界值(Test Statistic Value 和 Critical Value)进行判断
# Lags Used: 滞后阶数
# Number of Observations Used: 统计的数据的数目
# Critical Value(1%, 5%, 10%): 不同程度拒绝原假设的统计值
# ADF检验的原假设是存在单位根,
# 只要这个统计值是小于 1%水平下的数字就可以极显著的拒绝原假设,认为数据平稳
# 注意,ADF值一般是负的,也有正的,但是它只有小于1%水平下的才能认为是及其显著的拒绝原假设
Exemplo n.º 44
0
# load the dataset
df = pd.read_csv('data/AirPassengers.csv')
print(df.dtypes)

df['Month'] = pd.to_datetime(df['Month'])
print(df.dtypes)

df.set_index('Month', inplace=True) 

plt.plot(df['Passengers'])

#Is the data stationary?
#Dickey-Fuller test
from statsmodels.tsa.stattools import adfuller
adf, pvalue, usedlag_, nobs_, critical_values_, icbest_ = adfuller(df)
print("pvalue = ", pvalue, " if above 0.05, data is not stationary")
#Since data is not stationary, we may need SARIMA and not just ARIMA

# 
df['year'] = [d.year for d in df.index]
df['month'] = [d.strftime('%b') for d in df.index]
years = df['year'].unique()

#Plot yearly and monthly values as boxplot
sns.boxplot(x='year', y='Passengers', data=df)
sns.boxplot(x='month', y='Passengers', data=df)


#Extract and plot trend, seasonal and residuals. 
from statsmodels.tsa.seasonal import seasonal_decompose 
Exemplo n.º 45
0
 def setup_class(cls):
     cls.res1 = adfuller(cls.x, regression="ct", autolag=None,
             maxlag=4)
     cls.teststat = -1.8566374
     cls.pvalue = .67682968
     cls.critvalues = [-4.007, -3.437, -3.137]
def get_engle_granger_two_step_cointegration_test(y, x):
    """Estimates long-run and short-run cointegration relationship for series y and x and apply
    the two-step Engle & Granger test for cointegration.

    Uses a 2-step process to first estimate coefficients for the long-run relationship
        y_t = c + gamma * x_t + z_t

    and then the short-term relationship,
        y_t - y_(t-1) = alpha * z_(t-1) + epsilon_t,

    with z the found residuals of the first equation.

    Then tests cointegration by Dickey-Fuller phi=1 vs phi < 1 in
        z_t = phi * z_(t-1) + eta_t

    If this implies phi < 1, the z series is stationary is concluded to be
    stationary, and thus the series y and x are concluded to be cointegrated.

    Parameters

    Parameters
    ----------
    y : pd.Series
        The first time series of the pair to analyse.

    x : pd.Series
        The second time series of the pair to analyse.

    Returns
    -------
    c : float
        The constant term in the long-run relationship y_t = c + gamma * x_t + z_t. This
        describes the static shift of y with respect to gamma * x.

    gamma : float
        The gamma term in the long-run relationship y_t = c + gamma * x_t + z_t. This
        describes the ratio between the const-shifted y and x.

    alpha : float
        The alpha term in the short-run relationship y_t - y_(t-1) = alpha * z_(t-1) + epsilon. This
        gives an indication of the strength of the error correction toward the long-run mean.

    z : pd.Series
        Series of residuals z_t from the long-run relationship y_t = c + gamma * x_t + z_t, representing
        the value of the error correction term.

    dfstat : float
        The Dickey Fuller test-statistic for phi = 1 vs phi < 1 in the second equation. A more
        negative value implies the existence of stronger cointegration.

    pvalue : float
        The p-value corresponding to the Dickey Fuller test-statistic. A lower value implies
        stronger rejection of no-cointegration, thus stronger evidence of cointegration.

    """
    warnings.simplefilter(action="ignore", category=FutureWarning)
    long_run_ols = sm.OLS(y, sm.add_constant(x))
    warnings.simplefilter(action="default", category=FutureWarning)

    long_run_ols_fit = long_run_ols.fit()

    c, gamma = long_run_ols_fit.params
    z = long_run_ols_fit.resid

    short_run_ols = sm.OLS(y.diff().iloc[1:], (z.shift().iloc[1:]))
    short_run_ols_fit = short_run_ols.fit()

    alpha = short_run_ols_fit.params[0]

    # NOTE: The p-value returned by the adfuller function assumes we do not estimate z first, but test
    # stationarity of an unestimated series directly. This assumption should have limited effect for high N, however.
    # Critical values taking this into account more accurately are provided in e.g. McKinnon (1990) and
    # Engle & Yoo (1987).

    adfstat, pvalue, _, _, _ = adfuller(z, maxlag=1, autolag=None)

    return c, gamma, alpha, z, adfstat, pvalue
Exemplo n.º 47
0
 def setup_class(cls):
     cls.res1 = adfuller(cls.x, regression="c", autolag=None,
             maxlag=4)
     cls.teststat = .97505319
     cls.pvalue = .99399563
     cls.critvalues = [-3.476, -2.883, -2.573]
Exemplo n.º 48
0
# fii = mod.fit()
# print(fii.summary())
#
# plt.plot(par_time, par_avg, color='blue')
# plt.plot(par_time, fii.predict(poly.fit_transform(par_time)), color='red')
# plt.ylim([-700, 14000])
# plt.title('Trajectory of $cp_{t_k}^-$')
# plt.xlabel('Time Steps')
# plt.ylabel('$cp_{t_k}^-$')
# plt.show()

from statsmodels.tsa.stattools import adfuller

par = plus_ccpp_t
par_avg = [np.mean(x) for x in par if x != []]
result = adfuller(par_avg)
# print('ADF Statistic: %f' % result[0])
print('p-value: %.2f' % result[1])
# print('Critical Values:')
# for key, value in result[4].items():
# 	print('\t%s: %.3f' % (key, value))

# pi_t is a quadratic function. polynomial regression
par = pioo_t
par_avg = [np.mean(x) for x in par if x != []]
par_time = list(range(1, len(par_avg) + 1))
# plt.plot(par_avg)

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly = PolynomialFeatures(degree=2)
Exemplo n.º 49
0
 def __init__(self):
     self.res1 = adfuller(self.y, regression="c", autolag=None, maxlag=1)
     self.teststat = -4.3346988
     self.pvalue = .00038661
     self.critvalues = [-3.476, -2.883, -2.573]
Exemplo n.º 50
0
# Solutions
from statsmodels.tsa.stattools import kpss

kpss_stat, p_value, lags, crit = kpss(X_stationary)

p_value > 0.05, p_value  # Accept hypothesis of stationarity

kpss_stat, p_value, lags, crit = kpss(X_non_stationary)

p_value > 0.05, p_value  # Reject hypothesis of stationarity

kpss_stat, p_value, lags, crit = kpss(X_AR1)

p_value > 0.05, p_value  # Reject hypothesis of stationarity

from statsmodels.tsa.stattools import adfuller

adf, p_value, *_ = adfuller(X_stationary)

p_value < 0.05, p_value  # Reject hypothesis of a unit root, which indicates stationary
Exemplo n.º 51
0
 def __init__(self):
     self.res1 = adfuller(self.x, regression="c", autolag=None, maxlag=4)
     self.teststat = .97505319
     self.pvalue = .99399563
     self.critvalues = [-3.476, -2.883, -2.573]
Exemplo n.º 52
0
def test_adfuller_maxlag_too_large(reset_randomstate):
    y = np.random.standard_normal(100)
    with pytest.raises(ValueError, match='maxlag must be less than'):
        adfuller(y, maxlag=51)
Exemplo n.º 53
0
    return pd.Series(diff)
 
# invert differenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]





# difference data
months_in_year = 12
stationary = difference(X, months_in_year)
stationary.index = dataPiece.index[months_in_year:]
# check if stationary
result = adfuller(stationary)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))
# save
stationary.to_csv('stationary.csv')
# plot
stationary.plot()
plt.show()




Exemplo n.º 54
0
def augDickeyFuller(series, lag):
    stats = ts.adfuller(series, lag)
    return stats
Exemplo n.º 55
0
#     dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
#     for key, value in dftest[4].items():
#         dfoutput['Critical Value (%s)' % key] = value
#     print dfoutput

# test_stationarity(volume)

ma = volume.rolling(window=12,center=False).mean()
msd = volume.rolling(window=12,center=False).std()
#msd = pd.rolling_std(volume,window=12,center=False)
plt.plot(volume,'blue')
plt.plot(ma,'green')
plt.plot(msd,'red')
plt.show()
# The moving average and moving deviation is increasing over year
adtestoutput = adfuller(volume.volume)
print('Test Statistic:      %.6f' % adtestoutput[0])
print('p-value:      %.6f' % adtestoutput[1])
print('#Lags Used:      %.6f' % adtestoutput[2])
print('Number of Observations Used      %.6f' % adtestoutput[3])
print('Critical Value (1%%)      %.6f' % adtestoutput[4]['1%'])
print('Critical Value (10%%      %.6f' % adtestoutput[4]['10%'])
print('Critical Value (5%%)      %.6f' % adtestoutput[4]['5%'])


#####Task 3:Make a Time Series stationary – 5pts
#QA plot log
logvolume = np.log(volume)
logvolume.plot()
pyplot.show()
#QC log data plot
Exemplo n.º 56
0
# Import the adfuller module from statsmodels
from statsmodels.tsa.stattools import adfuller

# Compute the ADF for HO and NG
result_HO = adfuller(HO['Close'])
print("The p-value for the ADF test on HO is ", result_HO[1])
result_NG = adfuller(NG['Close'])
print("The p-value for the ADF test on NG is ", result_NG[1])

# Compute the ADF of the spread
result_spread = adfuller(7.25 * HO['Close'] - NG['Close'])
print("The p-value for the ADF test on the spread is ", result_spread[1])
Exemplo n.º 57
0
# Test:
# #### H<sub>0</sub>: β = 1 (This is a random walk)
# #### H<sub>1</sub>: β < 1 (This is not a random walk)
# <br>
# Dickey-Fuller Test:
# #### H<sub>0</sub>: β = 0 (This is a random walk)
# #### H<sub>1</sub>: β < 0 (This is not a random walk)

# ### Augmented Dickey-Fuller test
# An augmented Dickey–Fuller test (ADF) tests the null hypothesis that a unit root is present in a time series sample. It is basically Dickey-Fuller test with more lagged changes on RHS.

# In[ ]:


# Augmented Dickey-Fuller test on volume of google and microsoft stocks 
adf = adfuller(microsoft["Volume"])
print("p-value of microsoft: {}".format(float(adf[1])))
adf = adfuller(google["Volume"])
print("p-value of google: {}".format(float(adf[1])))

# ##### As microsoft has p-value 0.0003201525 which is less than 0.05, null hypothesis is rejected and this is not a random walk.
# ##### Now google has p-value 0.0000006510 which is more than 0.05, null hypothesis is rejected and this is not a  random walk.

# ### Generating a random walk

# In[ ]:


seed(42)
rcParams['figure.figsize'] = 16, 6
random_walk = normal(loc=0, scale=0.01, size=1000)
Exemplo n.º 58
0
%matplotlib inline
df=pd.read_csv('time_series_data.csv')
df.head()

# Updating the header
df.columns=["Month","Sales"]
df.head()
df.describe()
df.set_index('Month',inplace=True)

from pylab import rcParams
rcParams['figure.figsize'] = 15, 7
df.plot()

from statsmodels.tsa.stattools import adfuller
test_result=adfuller(df['Sales'])




#case4
#https://www.digitalocean.com/community/tutorials/a-guide-to-time-series-forecasting-with-arima-in-python-3
import warnings
import itertools
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

data = sm.datasets.co2.load_pandas()
Exemplo n.º 59
0
def testStationarity(ts,
                     window,
                     initialPlotDate='',
                     finalPlotDate='',
                     saveImg=False,
                     saveDir='',
                     saveName='',
                     saveFormat='pdf'):
    initialPlotDate = initialPlotDate if initialPlotDate else ts.index[0]
    finalPlotDate = finalPlotDate if finalPlotDate else ts.index[-1]

    #Determing rolling statistics
    rolmean = ts.dropna().rolling(window=window, center=False).mean()
    rolstd = ts.dropna().rolling(window=window, center=False).std()

    fig, ax = plt.subplots(figsize=(15, 10), nrows=1, ncols=1, sharex=True)
    #Plot rolling statistics:
    ax.plot(ts[initialPlotDate:finalPlotDate], color='blue', label='Original')
    ax.plot(rolmean[initialPlotDate:finalPlotDate],
            color='red',
            label='Rolling Mean')
    ax.plot(rolstd[initialPlotDate:finalPlotDate],
            color='black',
            label='Rolling Std')
    ax.legend(loc='best')
    ax.set_title('Rolling Mean & Standard Deviation')

    #Perform Dickey-Fuller test:
    #print 'Results of Dickey-Fuller Test:'
    dftest = adfuller(ts.dropna(), autolag='AIC')
    dfoutput = pd.Series(dftest[0:4],
                         index=[
                             'Test Statistic', 'p-value', '#Lags Used',
                             'Number of Observations Used'
                         ])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    #print dfoutput

    plt.figtext(0.1,
                0.010,
                'Results of Dickey-Fuller Test:',
                size=14,
                verticalalignment='center')
    plt.figtext(0.1,
                -0.025,
                'Test Statistic {:48.6f}'.format(dfoutput['Test Statistic']),
                size=14)
    plt.figtext(0.1,
                -0.050,
                'p-value {:58.6f}'.format(dfoutput['p-value']),
                size=14)
    plt.figtext(0.1,
                -0.075,
                '#Lags Used {:51.6f}'.format(dfoutput['#Lags Used']),
                size=14)
    plt.figtext(0.1,
                -0.100,
                'Number of Observations Used {:20.6f}'.format(
                    dfoutput['Number of Observations Used']),
                size=14)
    plt.figtext(0.1,
                -0.125,
                'Critical Value (1%) {:41.6f}'.format(
                    dfoutput['Critical Value (1%)']),
                size=14)
    plt.figtext(0.1,
                -0.150,
                'Critical Value (5%) {:41.6f}'.format(
                    dfoutput['Critical Value (5%)']),
                size=14)
    plt.figtext(0.1,
                -0.175,
                'Critical Value (10%) {:39.6f}'.format(
                    dfoutput['Critical Value (10%)']),
                size=14)

    if saveImg:
        saveName = saveName if saveName else '{}_ADF'.format(s.name)
        fig.savefig('{}/{}.{}'.format(saveDir, saveName, saveFormat),
                    bbox_inches='tight')
    return fig, ax
Exemplo n.º 60
0
#Muestreo medio mensual
data_m = file['Temp'].resample('MS').mean()

#Grafica serie muestreada
#print(data_m.head())
data_m.plot(figsize=(15, 5))
plt.show()

#La serie presenta periodicidad anual pero no una componenete de tendencia
rcParams['figure.figsize'] = 15, 5
decomposition = sm.tsa.seasonal_decompose(data_m, model='additive')
fig = decomposition.plot()
plt.show()

#El valor p<0.001 (un valor significativo) => que se descarta la hipótesis de no estacionariedad
result = adfuller(file.Temp, autolag='AIC')
print(f'p-value: {result[1]}')

# Se definen los parámetros p, d y q, valores entre 0 y 2
p = d = q = range(0, 2)
# Genera las diferentes combinaciones de las tripletas p, d y q
pdq = list(itertools.product(p, d, q))
# Genera las diferentes combinaciones de las tripletas estacionales p, d y q
seasonal_pdq = [(x[0], x[1], x[2], 12)
                for x in list(itertools.product(p, d, q))]

warnings.filterwarnings("ignore")

# Genera las diferentes combinaciones de las tripletas p, d y q del modelo ARIMA con las estacionales P, D y Q
parameters = list(itertools.product(pdq, seasonal_pdq))