# -*- coding:utf-8 -*- import pandas as pd import matplotlib.pyplot as plt import numpy as np from datetime import datetime from test_stationarity import test_stationarity plt.style.use('presentation') daily_flow_head = ['date', 'flow'] daily_flow_a = pd.Series.from_csv('../data/daily-flow-full-record/flow/daily-flow-A', parse_dates=True) fc_daily_flow_a=daily_flow_a[datetime(2014,1,1):] test_stationarity(fc_daily_flow_a) #print daily_flow '''lineplot_a = plt.plot(fc_daily_flow_a,'b-', linewidth=2.0, alpha=0.8, label="JiHua") plt.xlabel('Date') plt.ylabel('Customer Flow') plt.title('Daily Customer Flow') plt.legend() plt.show()'''
#test_sta.test_stationarity(ts_log_moving_avg_diff) # ## method 3 : remove rolling mean with exp weight ------------------- expweighted_avg=pd.ewma(ts_log,halflife=12) ts_log_ewma_diff = ts_log - expweighted_avg plt.plot(ts_log) plt.plot(expweighted_avg,color='red') plt.show() test_sta.test_stationarity(ts_log_ewma_diff) ## method 4 : remove trend and seasonality with differencing ------- #ts_log_diff=ts_log-ts_log.shift() # first order differencing # #plt.plot(ts_log_diff) # #plt.show() # #ts_log_diff.dropna(inplace=True) # #test_sta.test_stationarity(ts_log_diff) # #ts_log_diff_diff=ts_log_diff-ts_log_diff.shift() #second order differencing
# -*- coding:utf-8 -*- import pandas as pd import matplotlib.pyplot as plt import numpy as np from datetime import datetime from test_stationarity import test_stationarity plt.style.use('presentation') daily_flow_head = ['date', 'flow'] daily_flow_a = pd.Series.from_csv( '../data/daily-flow-full-record/flow/daily-flow-A', parse_dates=True) fc_daily_flow_a = daily_flow_a[datetime(2014, 1, 1):] test_stationarity(fc_daily_flow_a) #print daily_flow '''lineplot_a = plt.plot(fc_daily_flow_a,'b-', linewidth=2.0, alpha=0.8, label="JiHua") plt.xlabel('Date') plt.ylabel('Customer Flow') plt.title('Daily Customer Flow') plt.legend() plt.show()'''
residual_FC2_all=FC2_value-FC2_all_rloessed ts_all=pd.Series(residual_FC2_all,index=ts_index_FC2) ts_learn=pd.Series(residual_FC2_learn,index=ts_index_FC2_1) ## -------------------------------------stationary the ts --------------------------------------------------- # method 1 : remove the trend and seasonality with differencing ts_learn_diff=ts_learn-ts_learn.shift() # first order differencing plt.plot(ts_learn_diff) plt.show() ts_learn_diff.dropna(inplace=True) test_sta.test_stationarity(ts_learn_diff) # decide the structure (p,q) of the model ------------------------------------ import statsmodels.api as sm fig = plt.figure(figsize=(12,8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(ts_learn_diff, lags=40, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(ts_learn_diff, lags=40, ax=ax2) plt.show() ## decide the parameter of the model ------------------------------------------ from statsmodels.tsa.arima_model import ARIMA model = ARIMA(ts_learn, order=(5, 1, 10))
fig = add_decomp.plot() # plt.show() # From the additive decomposition we can clearly see the upward trend and the seasonal part. The timeseries is not # stationary. # test_stationarity(train_1['sales']) # Looks like the train_1 timeseries is not stationary, even though the p-value of D-F-test is under 5%. # Lets try to stationarize the data. Dtrain_1 = train_1.sales - train_1.sales.shift(1) # Shift data Dtrain_1 = Dtrain_1.dropna(inplace=False) # Drop NaN values test_stationarity(Dtrain_1, window=12) #Test the stationarity again # Now the data plots look like a stationary timeseries. Also the p-value of D-F-test is extremely small thus # the timesseries is now stationary. # Now we can take a look at the ACF and PACF plots to determine the parameters for our model. # The original data fig = plt.figure(figsize=(12, 8)) ax1 = fig.add_subplot(211) fig = sm.graphics.tsa.plot_acf(train_1.sales, lags=30, ax=ax1) ax2 = fig.add_subplot(212) fig = sm.graphics.tsa.plot_pacf(train_1.sales, lags=30, ax=ax2) # plt.show() # The differenced data