def seasonality(df, n, freq='monthly'): if n == 1: lag=np.select([freq=='monthly',freq=='quarterly'], \ [12,4]) df2 = sd(df, freq=12) return df2.trend elif n == 2: lag=np.select([freq=='monthly',freq=='quarterly',freq=='annual'], \ [14400,1600,100]) cycle, trend = sm.tsa.filters.hpfilter(df, lag) return trend elif n == 3: lag=np.select([freq=='monthly',freq=='quarterly'], \ [12,4]) df3 = df - df.shift(1) - (df.shift(lag - 1) - df.shift(lag)) return df3 else: lag=np.select([freq=='monthly',freq=='quarterly'], \ [12,4]) var = locals() df_adj = copy.copy(df) for i in range(1, lag + 1): var['seasonal_weight'+str(i)]= \ np.mean(df[df.index.month==i])/np.mean(df) for j in df.index: df_adj[j:j] = df[j:j] / var['seasonal_weight' + str(j.month)] return df_adj
def seasonality_check(df, freq='monthly'): lag=np.select([freq=='monthly',freq=='quarterly'], \ [12,4]) print('ARIMA decomposition') df2 = sd(df, freq=lag) print(adf(df)) sm.graphics.tsa.plot_acf(df) plt.show() sm.graphics.tsa.plot_pacf(df) plt.show() df.plot() plt.title('original') plt.show() df2.trend.plot(c=pick_a_color()) plt.title('trend') plt.show() df2.seasonal.plot(c=pick_a_color()) plt.title('seasonality') plt.show() df2.resid.plot(c=pick_a_color()) plt.title('residual') plt.show() print('HP filter') hplag=np.select([freq=='monthly',freq=='quarterly',freq=='annual'], \ [14400,1600,100]) cycle, trend = sm.tsa.filters.hpfilter(df, hplag) cycle.plot(c=pick_a_color()) plt.title('cycle') plt.show() trend.plot(c=pick_a_color()) plt.title('trend') plt.show() print('differential') df3 = df - df.shift(1) - (df.shift(lag - 1) - df.shift(lag)) df3.plot(c=pick_a_color()) plt.show() print('weighted') var = locals() for i in range(1, lag + 1): var['seasonal_weight'+str(i)]= \ np.mean(df[df.index.month==i])/np.mean(df) print(var['seasonal_weight' + str(i)]) df_adj = pd.Series(df) for j in df.index: df_adj[j:j] = df[j:j] / var['seasonal_weight' + str(j.month)] df_adj.plot(c=pick_a_color()) plt.show()
def run_diagnostics(ts, title, label): ''' Function to plot the time series, decomposition, autocorrelation and partial autocorrelation functions of the original time series, the time series when differenced, and the time series when twice-differenced. Parameters: ts - Time series to be analyzed. title - Title for plots. label - Label for y-axis on plots. Returns: Plot of time series Plot of decomposition of time series Plots of autocorrelation and partial autocorrelation functions for time series P value output of adfuller test on time series Plot of differenced time series Plot of decomposition of differenced time series Plots of autocorrelation and partial autocorrelation functions for differenced time series P value output of adfuller test on differenced time series Plot of twice-differenced time series Plot of decomposition of twice-differenced time series Plots of autocorrelation and partial autocorrelation functions for twice-differenced time series P value output of adfuller test on twice-differenced time series ''' # Define differenced time series diff_ts = ts.diff().dropna() # Define twice-differenced time series diff_diff_ts = diff_ts.diff().dropna() # Plot Initial time series, applies title and label ts.plot() plt.title('{}'.format(title)) plt.xlabel('Date') plt.ylabel('{}'.format(label)) plt.show() # Plots decomposition of time series decomposed_ts = sd(ts) decomposed_ts.plot() plt.show() # Plots acf and pacf for time series plot_acf(ts) plt.show() plot_pacf(ts) plt.show() # Prints the p-value of an adfuller test on original time series adfuller_ts = adfuller(ts) print('Adfuller results - p value:', adfuller_ts[1]) # Plots differenced time series diff_ts.plot() plt.title('Differenced {}'.format(title)) plt.xlabel('Date') plt.ylabel('{}'.format(label)) plt.show() # Plots decomposition of differenced time series decomposed_diff = sd(diff_ts) decomposed_diff.plot() plt.show() # Plots acf and pacf of differenced time series plot_acf(diff_ts) plt.show() plot_pacf(diff_ts) plt.show() # Prints p value of adfuller test on differenced time series adfuller_diff = adfuller(diff_ts) print('Differenced adfuller results - p value:', adfuller_diff[1]) # Plots twice-differenced time series diff_diff_ts.plot() plt.title('Twice Differenced {}'.format(title)) plt.xlabel('Date') plt.ylabel('{}'.format(label)) plt.show() # Plots decomposition of twice-differenced time series decomposed_diff_diff = sd(diff_diff_ts) decomposed_diff_diff.plot() plt.show() # Plots acf and pacf of twice-differenced time series plot_acf(diff_diff_ts) plt.show() plot_pacf(diff_diff_ts) plt.show() # Prints p value of adfuller test on twice-differenced time series adfuller_diff_diff = adfuller(diff_diff_ts) print('Twice-differenced adfuller results - p value:', adfuller_diff_diff[1])