示例#1
0
# -*- coding:utf-8 -*-
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from test_stationarity import test_stationarity 
plt.style.use('presentation')

daily_flow_head = ['date', 'flow']
daily_flow_a = pd.Series.from_csv('../data/daily-flow-full-record/flow/daily-flow-A', parse_dates=True)

fc_daily_flow_a=daily_flow_a[datetime(2014,1,1):]

test_stationarity(fc_daily_flow_a)
#print daily_flow
'''lineplot_a = plt.plot(fc_daily_flow_a,'b-', linewidth=2.0, alpha=0.8, label="JiHua")

plt.xlabel('Date')
plt.ylabel('Customer Flow')
plt.title('Daily Customer Flow')
plt.legend()
plt.show()'''
示例#2
0
#test_sta.test_stationarity(ts_log_moving_avg_diff)
#

## method 3 : remove rolling mean with exp weight -------------------

expweighted_avg=pd.ewma(ts_log,halflife=12)

ts_log_ewma_diff = ts_log - expweighted_avg

plt.plot(ts_log)

plt.plot(expweighted_avg,color='red')

plt.show()

test_sta.test_stationarity(ts_log_ewma_diff)


## method 4 : remove trend and seasonality with differencing  -------

#ts_log_diff=ts_log-ts_log.shift()   # first order differencing
#
#plt.plot(ts_log_diff)
#
#plt.show()
#
#ts_log_diff.dropna(inplace=True)
#
#test_sta.test_stationarity(ts_log_diff)
#
#ts_log_diff_diff=ts_log_diff-ts_log_diff.shift()   #second order differencing
示例#3
0
# -*- coding:utf-8 -*-
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from test_stationarity import test_stationarity
plt.style.use('presentation')

daily_flow_head = ['date', 'flow']
daily_flow_a = pd.Series.from_csv(
    '../data/daily-flow-full-record/flow/daily-flow-A', parse_dates=True)

fc_daily_flow_a = daily_flow_a[datetime(2014, 1, 1):]

test_stationarity(fc_daily_flow_a)
#print daily_flow
'''lineplot_a = plt.plot(fc_daily_flow_a,'b-', linewidth=2.0, alpha=0.8, label="JiHua")

plt.xlabel('Date')
plt.ylabel('Customer Flow')
plt.title('Daily Customer Flow')
plt.legend()
plt.show()'''
示例#4
0
residual_FC2_all=FC2_value-FC2_all_rloessed


ts_all=pd.Series(residual_FC2_all,index=ts_index_FC2)
ts_learn=pd.Series(residual_FC2_learn,index=ts_index_FC2_1)


## -------------------------------------stationary the ts ---------------------------------------------------

# method 1 : remove the trend and seasonality with differencing

ts_learn_diff=ts_learn-ts_learn.shift()   # first order differencing
plt.plot(ts_learn_diff)
plt.show()
ts_learn_diff.dropna(inplace=True)
test_sta.test_stationarity(ts_learn_diff)

# decide the structure (p,q) of the model ------------------------------------

import statsmodels.api as sm
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(ts_learn_diff, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(ts_learn_diff, lags=40, ax=ax2)
plt.show()

## decide the parameter of the model ------------------------------------------
from statsmodels.tsa.arima_model import ARIMA

model = ARIMA(ts_learn, order=(5, 1, 10))  
fig = add_decomp.plot()
# plt.show()

# From the additive decomposition we can clearly see the upward trend and the seasonal part. The timeseries is not
# stationary.


# test_stationarity(train_1['sales'])

# Looks like the train_1 timeseries is not stationary, even though the p-value of D-F-test is under 5%.
# Lets try to stationarize the data.

Dtrain_1 = train_1.sales - train_1.sales.shift(1)  # Shift data
Dtrain_1 = Dtrain_1.dropna(inplace=False)  # Drop NaN values

test_stationarity(Dtrain_1, window=12)  #Test the stationarity again

# Now the data plots look like a stationary timeseries. Also the p-value of D-F-test is extremely small thus
# the timesseries is now stationary.

# Now we can take a look at the ACF and PACF plots to determine the parameters for our model.

# The original data
fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(train_1.sales, lags=30, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(train_1.sales, lags=30, ax=ax2)
# plt.show()

# The differenced data