Exemplo n.º 1
2
def draw_acf_pacf(ts, lags=31):
    # type: (object, object) -> object
    f = plt.figure(facecolor='white')
    ax1 = f.add_subplot(211)
    plot_acf(ts, lags=31, ax=ax1)
    ax2 = f.add_subplot(212)
    plot_pacf(ts, lags=31, ax=ax2)
    plt.show()
Exemplo n.º 2
0
def draw_acf_pacf(ts, lags=1):
    f = plt.figure(facecolor='white')
    ax1 = f.add_subplot(211)
    plot_acf(ts, lags=31, ax=ax1)
    ax2 = f.add_subplot(212)
    plot_pacf(ts, lags=31, ax=ax2)
    plt.show()
Exemplo n.º 3
0
    def draw_ACFs(df):
        """
        :param df: pandas.DataFrame
        """
        def label(ax, string):
            ax.annotate(string, (1, 1), xytext=(-8, -8), ha='right', va='top',
                        size=14, xycoords='axes fraction', textcoords='offset points')

        fig, axes = plt.subplots(nrows=5, figsize=(8, 12))
        fig.tight_layout()

        axes[0].plot(df[TimeSeriesDataFrameMap.Square_residuals])
        label(axes[0], 'Returns')

        plot_acf(df[TimeSeriesDataFrameMap.Residuals], axes[1], lags=10)
        label(axes[1], 'Residuals autocorrelation')

        plot_acf(df[TimeSeriesDataFrameMap.Abs_residuals], axes[2], lags=10)
        label(axes[2], 'Absolute residuals autocorrelation')

        plot_acf(df[TimeSeriesDataFrameMap.Square_residuals], axes[3], lags=10)
        label(axes[3], 'Square residuals autocorrelation')

        plot_pacf(df[TimeSeriesDataFrameMap.Square_residuals], axes[4], lags=10)
        label(axes[4], 'Square residuals partial autocorrelation')
        plt.show()
Exemplo n.º 4
0
    def d_param(self, diff):
        '''function takes different values for difference step, and returns true or false flag if acf and pacf values
        lie into the threshold area'''
        THRESHOLD = 0.08
        if diff == 0:
            acf = tss.acf(self.val)
            pacf = tss.pacf(self.val)
            # acf and pacf plots
            fig = plt.figure(figsize = (12,8))
            ax1 = fig.add_subplot(121)
            fig = plot_acf(self.val,lags =40 ,ax=ax1)
            ax2 = fig.add_subplot(122, sharey=ax1)
            fig= plot_pacf(self.val, lags = 40, ax =ax2)
            plt.savefig('ACF_vs_PACF.jpg')
            plt.close()
            # check if most acf and pacf are lie in the accepted region for diff0
            acf_percent = len(acf[np.abs(acf) <= THRESHOLD])/float(len(acf))
            pacf_percent = len(pacf[np.abs(pacf) <= THRESHOLD])/float(len(pacf))
            return (acf_percent >= .65) and (pacf_percent >= 0.65)

        elif diff == 1:
            diff1_acf = tss.acf(self.diff1_val.dropna())
            diff1_pacf = tss.pacf(self.diff1_val.dropna())
            # for acf and pacf plots
            fig = plt.figure(figsize = (12,8))
            ax1 = fig.add_subplot(121)
            fig = plot_acf(self.diff1_val.dropna(),lags =40 ,ax=ax1)
            ax2 = fig.add_subplot(122, sharey=ax1)
            fig= plot_pacf(self.diff1_val.dropna(), lags = 40, ax =ax2)
            plt.savefig('ACF_vs_PACF_diff1.jpg')
            plt.close()
            # check if most acf and pacf are lie in the accepted region for diff1
            acf_percent = len(diff1_acf[np.abs(diff1_acf) <= THRESHOLD])/float(len(diff1_acf))
            pacf_percent = len(diff1_pacf[np.abs(diff1_pacf) <= THRESHOLD])/float(len(diff1_pacf))
            return (acf_percent >= .65) and (pacf_percent >= 0.65)

        elif diff == 2:
            diff2_acf = tss.acf(self.diff2_val.dropna())
            diff2_pacf = tss.pacf(self.diff2_val.dropna())
            # check save fig for acf and pacf plots
            fig = plt.figure(figsize = (12,8))
            ax1 = fig.add_subplot(121)
            fig = plot_acf(self.diff2_val.dropna(),lags =40 ,ax=ax1)
            ax2 = fig.add_subplot(122, sharey=ax1)
            fig = plot_pacf(self.diff2_val.dropna(), lags = 40, ax =ax2)
            plt.savefig('ACF_vs_PACF_diff2.jpg')
            plt.close()
            # check if most acf and pacf are lie in the accepted region for diff2
            acf_percent = len(diff2_acf[np.abs(diff2_acf) <= THRESHOLD])/float(len(diff2_acf))
            pacf_percent = len(diff2_pacf[np.abs(diff2_pacf) <= THRESHOLD])/float(len(diff2_pacf))
            return (acf_percent >= .65) and (pacf_percent >= 0.65)

        else:
            raise InvalidParamError
Exemplo n.º 5
0
def test_plot_pacf(close_figures):
    # Just test that it runs.
    fig = plt.figure()
    ax = fig.add_subplot(111)

    ar = np.r_[1., -0.9]
    ma = np.r_[1., 0.9]
    armaprocess = tsp.ArmaProcess(ar, ma)
    rs = np.random.RandomState(1234)
    pacf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
    plot_pacf(pacf, ax=ax)
    plot_pacf(pacf, ax=ax, alpha=None)
Exemplo n.º 6
0
def test_plot_pacf():
    # Just test that it runs.
    fig = plt.figure()
    ax = fig.add_subplot(111)

    ar = np.r_[1., -0.9]
    ma = np.r_[1.,  0.9]
    armaprocess = tsp.ArmaProcess(ar, ma)
    pacf = armaprocess.pacf(20)[:20]
    plot_pacf(pacf, ax=ax)
    plot_pacf(pacf, ax=ax, alpha=None)

    plt.close(fig)
Exemplo n.º 7
0
def plotds(xt, nlag=30, fig_size=(12, 10)):
    if not isinstance(xt, pd.Series):
         xt = pd.Series(xt)
    plt.figure(figsize=fig_size)
    layout = (2, 2)
    
    # Assign axes
    ax_xt = plt.subplot2grid(layout, (0, 0), colspan=2)
    ax_acf= plt.subplot2grid(layout, (1, 0))
    ax_pacf = plt.subplot2grid(layout, (1, 1))
    
    # Plot graphs
    xt.plot(ax=ax_xt)
    ax_xt.set_title('Time Series')
    plot_acf(xt, lags=50, ax=ax_acf)
    plot_pacf(xt, lags=50, ax=ax_pacf)
    plt.tight_layout()
    return None
Exemplo n.º 8
0
 def plot_acf_pacf(self, channel, lags=20):
     '''
     Input: channel and #lags to include
     Output: Plots with autocorrelation function and partial autocorrelation function.
     '''
     #set indexto date in input
     ts = chan_filter(self.df, channel)
     ts.sort_index(inplace=True)
     data = ts["AVG CCV's"]
     fig = plt.figure(figsize=(12,8))
     ax1 = fig.add_subplot(211)
     fig = plot_acf(data, lags=lags, ax=ax1)
     ax2 = fig.add_subplot(212)
     fig = plot_pacf(data, lags=lags, ax=ax2)
     plt.show()
Exemplo n.º 9
0
def test_plot_pacf_irregular():
    # Just test that it runs.
    fig = plt.figure()
    ax = fig.add_subplot(111)

    ar = np.r_[1., -0.9]
    ma = np.r_[1., 0.9]
    armaprocess = tsp.ArmaProcess(ar, ma)
    rs = np.random.RandomState(1234)
    pacf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
    plot_pacf(pacf, ax=ax, lags=np.arange(1, 11))
    plot_pacf(pacf, ax=ax, lags=10, zero=False)
    plot_pacf(pacf, ax=ax, alpha=None, zero=False)

    plt.close(fig)
Exemplo n.º 10
0
def test_plot_pacf_kwargs():
    # Just test that it runs.
    fig = plt.figure()
    ax = fig.add_subplot(111)

    ar = np.r_[1., -0.9]
    ma = np.r_[1., 0.9]
    armaprocess = tsp.ArmaProcess(ar, ma)
    rs = np.random.RandomState(1234)
    pacf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)

    buff = BytesIO()
    plot_pacf(pacf, ax=ax)
    fig.savefig(buff, format='rgba')
    plt.close(fig)

    buff_linestyle = BytesIO()
    fig_linestyle = plt.figure()
    ax = fig_linestyle.add_subplot(111)
    plot_pacf(pacf, ax=ax, ls='-')
    fig_linestyle.savefig(buff_linestyle, format='rgba')
    plt.close(fig_linestyle)

    buff_with_vlines = BytesIO()
    fig_with_vlines = plt.figure()
    ax = fig_with_vlines.add_subplot(111)
    vlines_kwargs = {'linestyles': 'dashdot'}
    plot_pacf(pacf, ax=ax, vlines_kwargs=vlines_kwargs)
    fig_with_vlines.savefig(buff_with_vlines, format='rgba')
    plt.close(fig_with_vlines)

    buff.seek(0)
    buff_linestyle.seek(0)
    buff_with_vlines.seek(0)
    plain = buff.read()
    linestyle = buff_linestyle.read()
    with_vlines = buff_with_vlines.read()

    assert_(plain != linestyle)
    assert_(with_vlines != plain)
    assert_(linestyle != with_vlines)
# artifacts in the plot.  this works better
x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]
# make the plot
plt.plot(x,np.asarray(lcs),linewidth=1.0)
plt.show()
plt.clf()
# there's a definite linear, increasing trend... 
# let's first try diffs
lcs_d1 = lcs.diff()
plt.plot(x,np.asarray(lcs_d1),linewidth=1.0)
plt.show()
# trend is gone
# we could have used the signal package like this (with same result):
# import scipy.signal as sig
# lcs_dt = sig.detrend(lcs)
# plt.plot(x,lcs_dt,linewidth=2.0)
# plt.show()

# now plot the ACF of the transformed series
plt.figure()
st.plot_acf(lcs_d1)
plt.show()
plt.clf()

# and PACF
plt.figure()
st.plot_pacf(lcs_d1)
plt.show()
plt.clf()

print "no autocorrelated structures"
Exemplo n.º 12
0
plt.title('一阶差分')
# plt.show()
check = sm.tsa.stattools.adfuller(stock_diff)  #平稳性检验
print(check)
'''
result : (-8.940749717155187, 9.267799032663645e-15, 0, 149, {'1%': -3.4750180242954167, '5%': -2.8811408028842043, '10%': -2.577221358046935}, 561.3312278939167)
1%、%5、%10不同程度拒绝原假设的统计值和ADF Test result的比较,ADF Test result同时小于1%、5%、10%即说明非常好地拒绝该假设;
P-value是否非常接近0

故通过平稳性检验;
'''

acf = plot_acf(stock_diff, lags=20)
plt.title('ACF')
# acf.show()

pacf = plot_pacf(stock_diff, lags=20)
plt.title('PACF')
# pacf.show()

model = ARIMA(stock_train, order=(1, 1, 1), freq='W-MON')
result = model.fit()
#print(result.summary)

pred = result.predict('20160829', '20181203', dynamic=True, typ='levels')
plt.figure(figsize=(6, 6))
plt.xticks(rotation=45)
plt.plot(pred)
plt.plot(stock_train)
plt.show()
Exemplo n.º 13
0
#subtract regression line from loan count data to account for non-stationary data
new_loan= []
item_count = 0
for item in loan_count_summary:
	 new_loan.append(item-est.params[0]-est.params[1]*item_count)
	 item_count +=1
loan_count_rev = pd.Series(new_loan, index = loan_count_summary.index)

#create variables to plot regression line 
x = loan_count_summary.index
y = est.params[0]+ est.params[1]*x


#plot regression line with loan data and detrended data
plt.plot(x, y, 'r-')
loan_count_rev.plot()
lcr = loan_count_summary.plot()
lcr.set_xlabel('Time (1 unit is a Month)')
lcr.set_ylabel('Loan Count')
lcr.legend(['OLS Regression Line', 'Loan Count Detrended (Y-Regression Line)', 
	'Loan Count Raw Data'])
plt.show()

#plot auto-correlation and partial auto-correlation
plot_acf(loan_count_rev)
plt.show()
plot_pacf(loan_count_rev)
plt.show()

import matplotlib.pyplot             as plt
import numpy                         as np
import pandas                        as pd
import statsmodels.graphics.tsaplots as tsaplots

df = pd.read_csv('LoanStats3b.csv', header=1, low_memory=False)

# converts string to datetime object in pandas:
df['issue_d_format'] = pd.to_datetime(df['issue_d'])
dfts = df.set_index('issue_d_format')
year_month_summary = dfts.groupby(lambda x : x.year * 100 + x.month).count()
loan_count_summary = year_month_summary['issue_d']

tsaplots.plot_acf(loan_count_summary)
tsaplots.plot_pacf(loan_count_summary)
plt.show()
Exemplo n.º 15
0
df = df['CO1 Comdty']
df = df.dropna()
prices = df.sort_index()

# Explore the data a little
prices.plot()

prices.mean()  # mean
prices.std()  # standard deviation
prices.std() / prices.mean()  # coefficient of variation
# the stock is very volatile, there are big jumps; the std. is rather large
# compared to the mean (high coefficient of variation)

tsaplots.plot_acf(prices, lags=36)  # just terrible
tsaplots.plot_pacf(prices, lags=36)  # very significant at lag=1
# this is a typical sign of an integrated process, so let's derivate it
# (use returns instead of prices), which will hopefully yield a stationary process!

returns = prices.pct_change()
returns = returns[1:]  # drop NA
returns.isnull().sum()

returns.plot()

returns.mean()
returns.std()
returns.std() / returns.mean()

# verify stationarity
adfuller(returns, regression="c")
Exemplo n.º 16
0
## p-value가 0에 가까운 값이 출력된다.
## 안정적인 시계열 데이터가 되었다.
## k = 1 로 결정(1차 차분이 안정적이다.)

# ARIMA(p, k, q) => p, q 결정
## 2 x 3 subplot을 통해 그려본다.
figure, axes = plt.subplots(2, 3, figsize=(15, 7))

axes[0, 0].plot(df.DEXKOUS)
axes[1, 0].plot(df.DEXKOUS.diff())

axes[0, 0].set_title('original series')
axes[1, 0].set_title('1st difference series')

plot_acf(df.DEXKOUS, axes[0, 1])
plot_pacf(df.DEXKOUS, axes[0, 2])

plot_acf(df.DEXKOUS.diff(), axes[1, 1])
plot_pacf(df.DEXKOUS.diff(), axes[1, 2])

plt.tight_layout()
plt.show()

## AR 차수 : 3차 ~ 1차
## MA 차수 : 2차 ~ 0차

# ARIMA 예측 모델링
## ARIMA의 차수는 (3, 1, 2)
model = ARIMA(
    df.DEXKOUS, order=(3, 1, 2), freq='B'
)  # 환율 데이터는 토요일 일요일은 나오지 않으므로 제외한다는 의미 freq='B' / Business day만 설정한다는 의미
Exemplo n.º 17
0
import statsmodels.api as sm
import warnings


from statsmodels.tsa.stattools import acovf, acf,pacf,pacf_yw,pacf_ols
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

#Non stat

df1 = pd.read_csv('./airline_passengers.csv',index_col='Month',parse_dates=True)
df1.index.freq = 'MS'

#df2 = pd.read_csv('statmodel/Data/DailyTotalFemaleBirths.csv', index_col='Data',parse_dates=True)
#df2.index.freq = 'D'
df2 = pd.read_csv('statmodel/Data/DailyTotalFemaleBirths.csv',index_col='Date',parse_dates=True)
df2.index.freq = 'D'
#warnings.filterwarnings('ignore')
df1.plot()
plt.show()

#non-stationary
plot_acf(df1, lags=40)


#stationary, ikkeno seasonality her
plot_acf(df2,lags=40)
plt.show()


plot_pacf(df2,lags=40, title="Partial Auto Correlation")
plt.show()
Exemplo n.º 18
0
# In[246]:

import statsmodels.graphics.tsaplots as tsplots
tsplots.plot_acf(returns, lags= 20)
plt.show()


# In[247]:

returns[1:10]


# In[248]:

tsplots.plot_pacf(returns, lags= 20)
plt.show()


# In[249]:

from pandas.tools.plotting import bootstrap_plot
bootstrap_plot(returns, size = 50)
plt.show()


# Find out the following facts about the data set
# - Total number of data points
# - Number of positive returns
# - Number of negative returns
# - Average annualized returns
Exemplo n.º 19
0
 def PlotPacf(self, dataset, title, indices=None, nlags=40):
     pacfFrame = pacf(dataset.values, nlags=nlags)
     plot_pacf(pacfFrame, title=title + 'PACF')
     plt.show()
from pandas import read_csv
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
# ACF  = AR
# PACF = MA
from matplotlib import pyplot as pp

thisdir = os.getcwd()
print(thisdir)
series = read_csv(r'data\stationary.csv', header=None, index_col=0, parse_dates=True, squeeze=True)

pp.figure()
pp.subplot(211)
plot_acf(series, ax=pp.gca())
pp.subplot(212)
plot_pacf(series, ax=pp.gca())
pp.tight_layout(pad=3.0)
pp.show()
#%% Testen ARIMA
#   Uit bovenstaande ACF en PACF

from pandas import read_csv
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima_model import ARIMA
from math import sqrt
 
# create a differenced series
def difference(dataset, interval=1):
	diff = list()
	for i in range(interval, len(dataset)):
		value = dataset[i] - dataset[i - interval]
Exemplo n.º 21
0
'''
Created on Dec 11, 2018

@author: snake91
'''

import numpy as np
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt

x = np.random.normal(size=500)
window = 10

y = [np.mean(x[i - window:i]) for i in range(window, len(x))]

plt.plot(x[window:len(x)])
plt.plot(y)

plot_acf(x, lags=10)
plot_pacf(x, lags=10)
plot_acf(y, lags=10)
plot_pacf(y, lags=10)
Exemplo n.º 22
0
# stationory
stationoryResult = adfuller(targetData)
# print 'adf: ', stationoryResult[0]
# print 'p-value: ', stationoryResult[1]
# print 'Critical values: ', stationoryResult[4]
# if stationoryResult[0]> stationoryResult[4]['5%']:
#     print 'Time Series is nonstationary'
# else:
#     print 'Time Series is stationary'

# acf, pacf
f2 = plt.figure(facecolor='white')
ax1 = f2.add_subplot(211)
plot_acf(targetData, lags=40, ax=ax1)
ax2 = f2.add_subplot(212)
plot_pacf(targetData, lags=40, ax=ax2)
# plt.show()

# according to the problem, only use AR(p) p belongs to (30,35)

comm = MPI.COMM_WORLD
rank = comm.Get_rank()

AR_mod = lambda x: ARMA(targetData, (x, 0)).fit(disp=0, method='mle')
aic_list = AR_mod(rank).aic
print "%i|%i|%i" % (rank, aic_list, rank)
'''
# confirm the lag
p_index = aic_list.index(min(aic_list))
AR = AR_mod(p_index)
Exemplo n.º 23
0
def check_acf(x):
    fig, ax = plt.subplots(3, figsize=(12, 6))
    ax[0] = plot_acf(x, ax=ax[0], lags=25)
    ax[1] = plot_pacf(x, ax=ax[1], lags=25)
    ax[2].plot(x)
ϕ
2
=
0.3
 (again, reverse the signs)
Plot the PACF for simulated_data_2 using the plot_pacf function
'''
# Import the modules for simulating data and for plotting the PACF
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.graphics.tsaplots import plot_pacf

# Simulate AR(1) with phi=+0.6
ma = np.array([1])
ar = np.array([1, -0.6])
AR_object = ArmaProcess(ar, ma)
simulated_data_1 = AR_object.generate_sample(nsample=5000)

# Plot PACF for AR(1)
plot_pacf(simulated_data_1, lags=20)
plt.show()

# Simulate AR(2) with phi1=+0.6, phi2=+0.3
ma = np.array([1])
ar = np.array([1, -0.6, -0.3])
AR_object = ArmaProcess(ar, ma)
simulated_data_2 = AR_object.generate_sample(nsample=5000)

# Plot PACF for AR(2)
plot_pacf(simulated_data_2, lags=20)
plt.show()
Exemplo n.º 25
0
# Legend and Labels

plt.legend([
    "Normal Dist. Fit ($\mu \sim${0}, $\sigma=${1:.2f})".format(0, sigma),
    '$\hat{e}_t$'
])
plt.xlabel('Value')
plt.ylabel('Frequency')

#%%

from statsmodels.graphics.tsaplots import plot_pacf

ax = plt.subplot(gs[2])
plot_pacf(et_hat_series, lags=50, alpha=0.01, ax=ax)
plt.title('')
plt.xlabel('Lags')
plt.ylabel('PACF')

#%%
from statsmodels.tsa.ar_model import AR

resultGetVectorAR = GetVectorAR(et_hat[None, :], maxlags=1, trend='c')
resultGetAR = AR(et_hat).fit(maxlag=3, trend='c', method='cmle')
print('Is AR({%d}) model stable: {%s}' %
      (resultGetAR.k_ar, str(IsStable(resultGetAR.roots))))
print(
    'Is VectorAR({%s}) model stable: {%s}' %
    (resultGetVectorAR['maxlags'], str(IsStable(resultGetVectorAR['roots']))))
print('NOTE THAT VECTOR_AR[1] IS *NOT* STABLE')
Exemplo n.º 26
0
year_month_summary = dfts.groupby(lambda x: x.year * 100 + x.month).count()
loan_count_summary = year_month_summary['issue_d']

print(loan_count_summary)
#we're left with a data table of year+month x # of loans issued
plt.xlabel('2015 Issue Date (Month)')
plt.ylabel('Loans Issued')
loan_count_summary.plot()
plt.show()

#ACF
sag.plot_acf(loan_count_summary)
plt.show()

#PACF
sag.plot_pacf(loan_count_summary)
plt.show()

print(
    "There are autocorrelated structures in the data, specfically there seems to be Seasonality and a need to add an Auto Regressive term."
)

#output
#201501    2616
#201502    2588
#201503    3002
#201504    3067
#201505    3167
#201506    3494
#201507    3694
#201508    3729
def _plot_PACF(x):
    from statsmodels.graphics.tsaplots import plot_pacf
    plot_pacf(x)
Exemplo n.º 28
0
## Using Returns

df['returns'] = df.market_value.pct_change(1).mul(100)
df = df.iloc[1:]

sts.adfuller(df.returns)

## ACF and PACF for Returns
## ACF and PACF for Returns

sgt.plot_acf(df.returns, lags=40, zero = False)
plt.title("ACF FTSE Returns", size=24)
plt.show()

sgt.plot_pacf(df.returns, lags = 40, zero = False, method = ('ols'))
plt.title("PACF FTSE Returns", size=24)
plt.show()

## AR(1) for Returns
## AR(1) for Returns

model_ret_ar_1 = ARMA(df.returns, order = (1,0))

results_ret_ar_1 = model_ret_ar_1.fit()

results_ret_ar_1.summary()

## Higher-Lag AR Models for Returns
## Higher-Lag AR Models for Returns

# ## `for` loop for creating ACF and PACF plots

# In[19]:


from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf

for i in range(0,len(Warehouse)):
    plot_acf(diff_warehouse(Warehouse[i]).Order_Demand)
    print '\n\n\n\n___________________________________________________________________________________________________________________________'
    print color.BOLD  + '\n\n\t\t\t\t\t\t\t %s \n'% Warehouse[i] + color.END
    plt.show()
    plot_pacf(diff_warehouse(Warehouse[i]).Order_Demand)
    plt.show()


# ## Method 2 - Auto Arima

# In[20]:


from pyramid.arima import auto_arima
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode,  plot

for i in range(0,len(Warehouse)):
    train = diff_warehouse(Warehouse[i]).iloc[0:int(len(diff_warehouse(Warehouse[i]))*0.7)]
Exemplo n.º 30
0
kings_ma3_res = kings_ma3_res.dropna()
kings_ma3_res.head()

#Plotting histogram for residuals
plt.hist(kings_ma3_res)
plt.title('Histogram Residuals @MA3')

''' if we r getting symmetrical hist 
    then its good model that we did!
'''

#Plotting acf & pacf 
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
plot_acf(kings_ma3_res, lags=20) 
plot_pacf(kings_ma3_res, lags=19)

#Squaring residuals/ errors
kings_ma3_se = pow(kings_ma3_res,2)
kings_ma3_se.head()

#average/mean of squared residuals/ errors
kings_ma3_mse = (kings_ma3_se.sum())/len(kings_ma3_se)
print(kings_ma3_mse) #128.7527777777778

#Root of average/mean of squared residuals/ errors
kings_ma3_rmse = sqrt(kings_ma3_mse) 
print(kings_ma3_rmse) #11.346928120763689

#Another method to find RMSE
kings_ma3 = kings.rolling(window=3).mean()
Exemplo n.º 31
0
## 1. ARIMA ##

import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA

series = pd.read_csv('C:/Users/Shinhyunjin/Dropbox/market-price.csv',
                     header=0,
                     index_col=0,
                     squeeze=True)
series.plot()

plot_acf(series)
plot_pacf(series)
plt.show

#1차차분
diff_1 = series.diff(periods=1).iloc[1:]
diff_1.plot()
plot_acf(diff_1)
plot_pacf(diff_1)
plt.show()

# Modeling

model = ARIMA(series, order=(0, 1, 1))  #ARIMA(0,1,1)
model_fit = model.fit(trend='c', full_output=True, disp=1)
print(model_fit.summary())
Exemplo n.º 32
0
# ACF and PACF plots of time series
from pandas import read_csv
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from matplotlib import pyplot
series = read_csv('dataset.csv',
                  header=None,
                  index_col=0,
                  parse_dates=True,
                  squeeze=True)
pyplot.figure()
pyplot.subplot(211)
plot_acf(series, ax=pyplot.gca())
pyplot.subplot(212)
plot_pacf(series, ax=pyplot.gca())
pyplot.show()
Exemplo n.º 33
0
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(data).show()

#平稳性检测
from statsmodels.tsa.stattools import adfuller as ADF
print(u'原始序列的ADF检验结果为:', ADF(data[u'销量']))
#返回值依次为adf、pvalue、usedlag、nobs、critical values、icbest、regresults、resstore

#差分后的结果
D_data = data.diff().dropna()
D_data.columns = [u'销量差分']
D_data.plot()  #时序图
plt.show()
plot_acf(D_data).show()  #自相关图
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show()  #偏自相关图
print(u'差分序列的ADF检验结果为:', ADF(D_data[u'销量差分']))  #平稳性检测

#白噪声检验
from statsmodels.stats.diagnostic import acorr_ljungbox
print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1))  #返回统计量和p值

from statsmodels.tsa.arima_model import ARIMA

data[u'销量'] = data[u'销量'].astype(float)
#定阶
pmax = int(len(D_data) / 10)  #一般阶数不超过length/10
qmax = int(len(D_data) / 10)  #一般阶数不超过length/10
bic_matrix = []  #bic矩阵
for p in range(pmax + 1):
    tmp = []
def programmer_6():
    """
    警告解释:
    # UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "
    调用了多次plt.show()
    解决方案,使用plt.subplot()

    # RuntimeWarning: overflow encountered in exp
    运算精度不够

    forecastnum-->预测天数
    plot_acf().show()-->自相关图
    plot_pacf().show()-->偏自相关图
    """
    discfile = 'data/arima_data.xls'
    forecastnum = 5
    data = pd.read_excel(discfile, index_col=u'日期')

    fig = plt.figure(figsize=(8, 6))
    # 第一幅自相关图
    ax1 = plt.subplot(411)
    fig = plot_acf(data, ax=ax1)

    # 平稳性检测
    print(u'原始序列的ADF检验结果为:', ADF(data[u'销量']))
    # 返回值依次为adf、pvalue、usedlag、nobs、critical values、icbest、regresults、resstore

    # 差分后的结果
    D_data = data.diff().dropna()
    D_data.columns = [u'销量差分']
    # 时序图
    D_data.plot()
    plt.show()
    # 第二幅自相关图
    fig = plt.figure(figsize=(8, 6))
    ax2 = plt.subplot(412)
    fig = plot_acf(D_data, ax=ax2)
    # 偏自相关图
    ax3 = plt.subplot(414)
    fig = plot_pacf(D_data, ax=ax3)
    plt.show()
    fig.clf()

    print(u'差分序列的ADF检验结果为:', ADF(D_data[u'销量差分']))  # 平稳性检测

    # 白噪声检验
    print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1))  # 返回统计量和p值
    data[u'销量'] = data[u'销量'].astype(float)
    # 定阶
    pmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
    qmax = int(len(D_data) / 10)  # 一般阶数不超过length/10
    bic_matrix = []  # bic矩阵
    data.dropna(inplace=True)

    # 存在部分报错,所以用try来跳过报错;存在warning,暂未解决使用warnings跳过
    import warnings
    warnings.filterwarnings('error')
    for p in range(pmax + 1):
        tmp = []
        for q in range(qmax + 1):
            try:
                tmp.append(ARIMA(data, (p, 1, q)).fit().bic)
            except:
                tmp.append(None)
        bic_matrix.append(tmp)
    # 从中可以找出最小值
    bic_matrix = pd.DataFrame(bic_matrix)
    # 用stack展平,然后用idxmin找出最小值位置。
    p, q = bic_matrix.stack().idxmin()
    print(u'BIC最小的p值和q值为:%s、%s' % (p, q))
    model = ARIMA(data, (p, 1, q)).fit()  # 建立ARIMA(0, 1, 1)模型
    model.summary2()  # 给出一份模型报告
    model.forecast(forecastnum)  # 作为期5天的预测,返回预测结果、标准误差、置信区间。
Exemplo n.º 35
0
def display_pacf(series, lags = 50):
   from matplotlib import pyplot
   from statsmodels.graphics.tsaplots import plot_pacf
   plot_pacf(series, lags = lags)
   pyplot.show()
Exemplo n.º 36
0
    
    #Perform Dickey-Fuller test:
    print 'Results of Dickey-Fuller Test:'
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print dfoutput 

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# regular diff
diff0 = df.stack().diff(periods=4)[4:]
diff0.plot(title='European Retail Trade Differenced')
plot_acf(diff0, lags=30)
plot_pacf(diff0, lags=30)

test_stationarity(diff0)

# additional diff
diff1 = diff0.diff()[1:]
diff1.plot(title='European Retail Trade Differenced Twice')
plot_acf(diff1, lags=30)
plot_pacf(diff1, lags=30)

test_stationarity(diff1)

import statsmodels.api as sm
data = df.stack().values
model = sm.tsa.statespace.SARIMAX(data, order=(0,1,1), seasonal_order=(0,1,1,4))
results = model.fit()
#自相关,和偏相关图不是截尾,也不是拖尾.p > 0.05.
df.index = pd.to_datetime(df.index,format='%Y')     #to_datetime
df.plot()
plt.show()

#0 hypo: random walk with drift.
result = adfuller(df['tavg'])
print('adf test:',result[1])

#p>0.5,非平稳.一阶差分.
chg = df.diff()
chg = chg.dropna()
fix,axes = plt.subplots(2,1)
plot_acf(chg,lags=20,ax=axes[0])
plot_pacf(chg,lags=20,ax=axes[1])
plt.show()

#AR(1).
mod_ar1 = ARMA(chg,order=(1,0))
res_ar1 = mod_ar1.fit()
print('AIC of AR(1):',res_ar1.aic)
#AR(2).
mod_ar2 = ARMA(chg,order=(2,0))
res_ar2 = mod_ar2.fit()
print('AIC of AR(2):',res_ar2.aic)
#ARMA(1,1).
mod_arma11 = ARMA(chg,order=(1,1))
res_arma11 = mod_arma11.fit()
print('AIC of ARMA(1,1):',res_arma11.aic)
X = series.values
X = X.astype('float32')
train_size = int(len(X) * 0.50)
train, test = X[0:train_size], X[train_size:]
# walk-forward validation
history = [x for x in train]
predictions = list()
for i in range(len(test)):
    # difference data
    months_in_year = 12
    diff = difference(history, months_in_year)
    # predict
    model = ARIMA(diff, order=(0, 0, 1))
    model_fit = model.fit(trend='nc', disp=0)
    yhat = model_fit.forecast()[0]
    yhat = inverse_difference(history, yhat, months_in_year)
    predictions.append(yhat)
    # observation
    obs = test[i]
    history.append(obs)
# errors
residuals = [test[i] - predictions[i] for i in range(len(test))]
residuals = DataFrame(residuals)
print(residuals.describe())
# plot
pyplot.figure()
pyplot.subplot(211)
plot_acf(residuals, ax=pyplot.gca())
pyplot.subplot(212)
plot_pacf(residuals, ax=pyplot.gca())
pyplot.show()
Exemplo n.º 39
0
		autocorrelation_plot(y, ax=plt.subplot(222), color='k')
		plt.xlabel(u'Шаг')
		plt.ylabel(u'АКФ')
		plt.title('')
	
	# Spectrum plot
	plt.subplot(223)
	plt.plot(freq, np.log(spectrum), color='k')
	plt.xlabel(u"Частота")
	plt.ylabel(u"Амплитуда (log)")
	#plt.title("Timeseries spectrum")
	plt.grid(True)

	if f_show_PACF:
		from statsmodels.graphics.tsaplots import plot_pacf
		plot_pacf(y, ax=plt.subplot(224), lags=pacf_lags)
		plt.xlabel(u'Шаг')
		plt.ylabel(u'ЧАКФ')
		plt.title('')

	else:
		# Power spectrum plot
		plt.subplot(224)
		plt.plot(freq, power_spectrum, color='k')
		plt.xlabel(u"Частота")
		plt.ylabel(u"Мощность")
		#plt.title("Power spectrum")
	plt.grid(True)

	plt.subplots_adjust(hspace=0.4, wspace=0.4)
Exemplo n.º 40
0
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(data).show()

#平稳性检测
from statsmodels.tsa.stattools import adfuller as ADF
print( ADF(data[u'销量']))
#返回值依次为adf、pvalue、usedlag、nobs、critical values、icbest、regresults、resstore

#差分后的结果
D_data = data.diff().dropna()
D_data.columns = [u'销量差分']
D_data.plot() #时序图
plt.show()
plot_acf(D_data).show() #自相关图
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show() #偏自相关图
ADF(D_data[u'销量差分'])#平稳性检测

#白噪声检验
from statsmodels.stats.diagnostic import acorr_ljungbox
acorr_ljungbox(D_data, lags=1) #返回统计量和p值

from statsmodels.tsa.arima_model import ARIMA

#定阶
pmax = int(len(D_data)/10) #一般阶数不超过length/10
qmax = int(len(D_data)/10) #一般阶数不超过length/10
bic_matrix = [] #bic矩阵
for p in range(pmax+1):
  tmp = []
  for q in range(qmax+1):
Exemplo n.º 41
0
# We will perform statistical tests like KPSS and ADF to confirm our understanding.
#
# But first, let's plot ACF and PACF graphs.

# In[29]:

acf = plot_acf(series, lags=50, alpha=0.05)
plt.title("ACF for Weighted Price", size=20)
plt.show()

# The above graph shows that effect barely detoriate over time, so past values affect the present ones. The more lags we include, the better our model will fit the dataset, now the risk is coefficients might predict the dataset too well, cause an overfitting.
# In our model, we always try to include only those lags which have a direct effect on our present value. Hence, let's try PACF.

# In[30]:

plot_pacf(series, lags=50, alpha=0.05, method='ols')
plt.title("PACF for Weighted Price", size=20)
plt.show()

# Coefficients values for lag>5 are statistically not significant and their impact on the model is minimal, except a few spikes at 8,11,22 and beyond.

# <a id="subsection-four"></a>
# # KPSS Test
#
# The KPSS test, short for, Kwiatkowski-Phillips-Schmidt-Shin (KPSS), is a type of Unit root test that tests for the stationarity of a given series around a deterministic trend.
#
# Here, the null hypothesis is that the series is **stationary**.
#
# That is, if p-value is < signif level (say 0.05), then the series is non-stationary and vice versa.

# In[31]:
Exemplo n.º 42
0
import pandas as pd
from matplotlib import pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

header = "Sodium..mg."

series = pd.read_csv('paleo.csv',
                     header=0,
                     index_col=0,
                     parse_dates=True,
                     squeeze=True)

# Create figure
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 8))

# Make ACF plot
plot_acf(series[{header}], lags=10, zero=False, ax=ax1)

# Make PACF plot
plot_pacf(series[{header}], lags=10, zero=False, ax=ax2)

plt.show()
# ## Partial Autocorrelation
#
# In general, a partial correlation is a conditional correlation.
#
# It is the correlation between two variables under the assumption that we know and take into account the values of some other set of variables.
#
# For instance, consider a regression context in which y = response variable and x1, x2, and x3 are predictor variables.  The partial correlation between y and x3 is the correlation between the variables determined taking into account how both y and x3 are related to x1 and x2.
#
# Formally, this is relationship is defined as:
#
# ## $\frac{\text{Covariance}(y, x_3|x_1, x_2)}{\sqrt{\text{Variance}(y|x_1, x_2)\text{Variance}(x_3| x_1, x_2)}}$
#
# Check out this [link](http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc4463.htm) for full details on this.
# We can then plot this relationship:
# In[36]:
result = plot_pacf(df["Seasonal First Difference"].dropna())
# ### Interpretation
#
# Typically a sharp drop after lag "k" suggests an AR-k model should be used. If there is a gradual decline, it suggests an MA model.
# ### Final Thoughts on Autocorrelation and Partial Autocorrelation
#
# * Identification of an AR model is often best done with the PACF.
#     * For an AR model, the theoretical PACF “shuts off” past the order of the model.  The phrase “shuts off” means that in theory the partial autocorrelations are equal to 0 beyond that point.  Put another way, the number of non-zero partial autocorrelations gives the order of the AR model.  By the “order of the model” we mean the most extreme lag of x that is used as a predictor.
#
#
# * Identification of an MA model is often best done with the ACF rather than the PACF.
#     * For an MA model, the theoretical PACF does not shut off, but instead tapers toward 0 in some manner.  A clearer pattern for an MA model is in the ACF.  The ACF will have non-zero autocorrelations only at lags involved in the model.
# _____
# ### Final ACF and PACF Plots
#
# We've run quite a few plots, so let's just quickly get our "final" ACF and PACF plots. These are the ones we will be referencing in the rest of the notebook below.
# calculating and printing root mean squared error
rmse = np.sqrt(mean_squared_error(Y_test[1:], Y_predict_RandomWalk)) * 100
print(rf"The RMSE is {rmse:2.4f}%")

# # ARIMA

# ### Identifying hyperparameters

# In[96]:

# Plotting the PACF and ACF to identify AR and MA lags (as per Box-Jenkins identification)
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
plot_acf(Y_train, lags=100)
plot_pacf(Y_train, lags=100)
plt.show()

# For MA(q), only the first q autocorrelations are nonzero, so the ACF should cut off after lag q.
# For AR(p), the autocorrelations may decline gradually, but the PACF should cut off after lag p

# ### Building and training the model

# In[97]:

#marking start time for model training
start_time = time.time()

#training the ARIMA model
ARIMA_model = ARIMA(endog=Y_train, order=(0, 0, 0)).fit()
Exemplo n.º 45
0
fig = plt.figure(figsize=(12, 8))
ax2 = fig.add_subplot(111)
ts2 = ts.diff(2)
ts2.plot(ax=ax2)
plt.plot(ts2)
plt.show()

# 合适的q 和 p值
# 选择合适的ARIMA模型-即选择ARIMA模型中的P和q值

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
f = plt.figure(facecolor='white')
ax1 = f.add_subplot(211)
plot_acf(ts, lags=40, ax=ax1)
ax2 = f.add_subplot(212)
plot_pacf(ts, lags=40, ax=ax2)
plt.show()

# 预测结果
import statsmodels.api as sm

#
arma_mod20 = sm.tsa.ARMA(ts, (10, 3)).fit()
print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic)

pre = arma_mod20.predict('2018-08-16 17:50:00',
                         '2018-08-16 18:10:00',
                         dynamic=True)
print(pre)

fig, ax = plt.subplots(figsize=(12, 8))
def f_autocorr(pmi):

    a = plot_acf(pmi)
    b = plot_pacf(pmi)

    return (a, b)
dataset['#Passengers Second Diff'] = dataset['#Passengers First Diff'] - dataset['#Passengers First Diff'].shift(1)
dataset

adf_check(dataset['#Passengers Second Diff'].dropna())                 
                  
dataset['Seasonal Difference'] = dataset['#Passengers']-dataset['#Passengers'].shift(12)
dataset  

adf_check(dataset['Seasonal Difference'].dropna())

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf 

plot_acf(dataset['#Passengers Second Diff'].dropna(), lags=28)
#q=0

plot_pacf(dataset['#Passengers Second Diff'].dropna(),lags=14)
#p=0

plot_acf(dataset['Seasonal Difference'].dropna(), lags=12)
                  
plot_pacf(dataset['Seasonal Difference'].dropna(), lags=12)
#P=2 

from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm

model = sm.tsa.statespace.SARIMAX(dataset['#Passengers'], order=(1,2,1), seasonal_order=(2,2,0,12))
results = model.fit()
print(results.summary())

dataset['Forecast'] = results.predict(start=130, end=144, dynamic=True)
Exemplo n.º 48
0
year_month_summary = dfts.groupby(lambda x : x.year * 100 + x.month).count()
loan_count_summary = year_month_summary['issue_d']

print(loan_count_summary)
#we're left with a data table of year+month x # of loans issued
plt.xlabel('2015 Issue Date (Month)')
plt.ylabel('Loans Issued')
loan_count_summary.plot()
plt.show()

#ACF
sag.plot_acf(loan_count_summary)
plt.show()

#PACF
sag.plot_pacf(loan_count_summary)
plt.show()

print ("There are autocorrelated structures in the data, specfically there seems to be Seasonality and a need to add an Auto Regressive term.")

#output
#201501    2616
#201502    2588
#201503    3002
#201504    3067
#201505    3167
#201506    3494
#201507    3694
#201508    3729
#201509    3873
#201510    4181
Exemplo n.º 49
0
def timeseries_exploratory_plots(x, title="Plots", raw=True, change=True, change_type="adj",
                                 lag=1, autocorr=True, autocorr_changes=True,
                                 density=True, density_resolution=0.1,
                                 density_change=True, density_change_resolution=0.1,
                                 pacf=True, pacf_change=True,
                                 figsize=(10,20)):
    """ Given a pandas series, it plots up to three plots below each other.
        - the raw data
        - the percent changes
        - the autocorrelation plots

        NOTE: that currently it REQUIRES the data to NOT have any missing/NAN
        values, otherwise it will plot things incorrectly, so make sure data
        being fed in is cleaned up.

    Args:
        change_type:    (str)
            The type of changes to use. One of:
            - "diff" for raw differences in subsequent numbers
            - "pct"  to use pandas x.diff() function
            - "adj"  to use my adjusted percentage change function
        lag:    (int)(default = 1)
            How many timesteps to offset the change/diff by.
        density:    (bool) create plot of density?
        density_resolution: (float)(default=0.1)
            Resolution of the density estimation.

        density_change:    (bool) create plot of density for changes?
        density_change_resolution: (float)(default=0.1)
            Resolution of the density estimation.

        pacf: (bool) create partial auto correlation funciton plot on the raw data?
        pacf_change: (bool) create partial auto correlation funciton plot on the changes data?

    """
    nplots = sum([raw, change, autocorr, autocorr_changes, density, density_change, pacf, pacf_change])
    fig, axes = plt.subplots(nplots,1, figsize=figsize)
    axes = axes.flatten()
    fig.suptitle(title, fontsize=15)
    i = 0
    if raw:
        ax = plot_lines([x], axtitle="raw data",  xlabel="x", ylabel="raw value", ax=axes[i], show=False, minorgrid=True)
        i += 1

    if change or autocorr_changes:
        # percent_changes = pd.Series(x).pct_change()
        if change_type == "adj":
            percent_changes = adjusted_percent_change(x, lag=lag, epsilon=0.1)
        elif change_type == "pct":
            percent_changes = pd.Series(x).pct_change(lag)
        elif change_type == "diff":
            percent_changes = pd.Series(x).diff(lag)


    if change:
        ax = plot_lines([percent_changes], axtitle="Percent Change",
                        color_offset=1, ax=axes[i], xlabel="x",
            ylabel="percent change", minorgrid=True)
        i += 1

    if density:
        ax = plot_densities([x], ax=axes[i], axtitle="Distribution of raw data",
                            resolution=density_resolution, minorgrid=True)
        i += 1

    if density_change:
        ax = plot_densities([percent_changes], ax=axes[i], axtitle="Distribution of changes",
                            resolution=density_change_resolution, minorgrid=True)
        i += 1

    # Autocorrelation function plot
    if autocorr:
        ax = axes[i]
        fig = plot_acf(x, lags=50,
                       alpha=0.05, title="Autocorrelation plot", ax=ax)
        setgrid(axes[i], minor=True)
        ax.set_xlabel("lag amount")
        ax.set_ylabel("Correlation")
        i += 1

    # Autocorrelation function plot of the changes
    if autocorr_changes:
        ax = axes[i]
        fig = plot_acf(percent_changes[1:].fillna(method="backfill"), lags=50,
                       alpha=0.05, title="Autocorrelation of Percent Changes plot", ax=ax)
        setgrid(axes[i], minor=True)
        ax.set_xlabel("lag amount")
        ax.set_ylabel("Correlation")
        i += 1

    # partial Autocorrelation function plot
    if pacf:
        ax = axes[i]
        fig = plot_pacf(x, lags=50,
                       alpha=0.05, title="Partial Autocorrelation Function plot", ax=ax)
        setgrid(axes[i], minor=True)
        ax.set_xlabel("lag amount")
        ax.set_ylabel("Correlation")
        i += 1

    # partial Autocorrelation function plot on changes data
    if pacf_change:
        ax = axes[i]
        fig = plot_pacf(percent_changes, lags=50,
                       alpha=0.05, title="Partial Autocorrelation Function plot on changes", ax=ax)
        setgrid(axes[i], minor=True)
        ax.set_xlabel("lag amount")
        ax.set_ylabel("Correlation")
        i += 1


    # Give enough spacing between subplots x-axes and titles of plots
    fig.tight_layout(pad=1.10,  rect=[0, 0.03, 1, 0.95])

    return fig
Exemplo n.º 50
0
df = pd.read_csv("data/^GSPC.csv", index_col=0, parse_dates=True)
df.head(10)

# checking for Autocorrelation
# Autocorrelation plots
autocorrelation_plot(df)
plt.savefig(PATH + "Autocorrelation plot")
plt.show()

# Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) plots
plot_acf(df["Adj Close"], lags=50)  # lag 50 days
plt.savefig(PATH + "Autocorrelation Function plot")
plt.show()

plot_pacf(df["Adj Close"], lags=50)
plt.savefig(PATH + "Partial Autocorrelation Function plot")
plt.show()

# If the time series is stationary, the ACF/PACF plots will show a quick drop-off in correlation after a
# small amount of lag between points.
# This data is non-stationary as a high number of previous observations are correlated with future values.

### Autoregressive model ###

# train / test split
# test - predictions of last 5 years of the Adj. Close price
test_length = 1265
X = df["Adj Close"].values
train, test = X[1:len(X) - test_length], X[len(X) - test_length:]
Exemplo n.º 51
0
plt.rcParams['axes.unicode_minus'] = False
data.plot()
plt.show()

from statsmodels.graphics.tsaplots import plot_acf
plot_acf(data).show()

from statsmodels.tsa.stattools import adfuller as ADF 

print 'ADF test result:', ADF(data['value'])

D_data = data.diff().dropna()
D_data.columns = ['diff value']
D_data.plot()
plt.show()
plot_acf(D_data).show()
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(D_data).show()
print 'diff seq ADF test result:', ADF(D_data['diff value'])

from statsmodels.stats.diagnostic import acorr_ljungbox
print 'dff white noise test result:', acorr_ljungbox(D_data, lags = 1)

from statsmodels.tsa.arima_model import ARIMA


model = ARIMA(data, (1,1,1)).fit()
model.summary2()
model.forecast(5*6)