def decompose(df,col,freq): "To plot the decomposition graphs " decomposed = seasonal_decompose(df[col].values, freq=freq) pd.DataFrame(decomposed.observed).plot(figsize=(12,4), title = "Observed") pd.DataFrame(decomposed.trend).plot(figsize=(12,4), title = "Trend") pd.DataFrame(decomposed.seasonal).plot(figsize=(12,4), title = "Seasonal") pd.DataFrame(decomposed.resid).plot(figsize=(12,4), title = "Residuals")
def test_pandas(self): res_add = seasonal_decompose(self.data, freq=4) freq_override_data = self.data.copy() freq_override_data.index = DatetimeIndex(start='1/1/1951', periods=len(freq_override_data), freq='A') res_add_override = seasonal_decompose(freq_override_data, freq=4) seasonal = [62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25] trend = [np.nan, np.nan, 159.12, 204.00, 221.25, 245.12, 319.75, 451.50, 561.12, 619.25, 615.62, 548.00, 462.12, 381.12, 316.62, 264.00, 228.38, 210.75, 188.38, 199.00, 207.12, 191.00, 166.88, 72.00, -9.25, -33.12, -36.75, 36.25, 103.00, 131.62, np.nan, np.nan] random = [np.nan, np.nan, 78.254, 70.254, -36.710, -94.299, -6.371, -62.246, 105.415, 103.576, 2.754, 1.254, 15.415, -10.299, -33.246, -27.746, 46.165, -57.924, 28.004, -36.746, -37.585, 151.826, -75.496, 86.254, -10.210, -194.049, 48.129, 11.004, -40.460, 143.201, np.nan, np.nan] assert_almost_equal(res_add.seasonal.values.squeeze(), seasonal, 2) assert_almost_equal(res_add.trend.values.squeeze(), trend, 2) assert_almost_equal(res_add.resid.values.squeeze(), random, 3) assert_almost_equal(res_add_override.seasonal.values.squeeze(), seasonal, 2) assert_almost_equal(res_add_override.trend.values.squeeze(), trend, 2) assert_almost_equal(res_add_override.resid.values.squeeze(), random, 3) assert_equal(res_add.seasonal.index.values.squeeze(), self.data.index.values) res_mult = seasonal_decompose(np.abs(self.data), 'm', freq=4) res_mult_override = seasonal_decompose(np.abs(freq_override_data), 'm', freq=4) seasonal = [1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931] trend = [np.nan, np.nan, 171.62, 204.00, 221.25, 245.12, 319.75, 451.50, 561.12, 619.25, 615.62, 548.00, 462.12, 381.12, 316.62, 264.00, 228.38, 210.75, 188.38, 199.00, 207.12, 191.00, 166.88, 107.25, 80.50, 79.12, 78.75, 116.50, 140.00, 157.38, np.nan, np.nan] random = [np.nan, np.nan, 1.29263, 1.51360, 1.03223, 0.62226, 1.04771, 1.05139, 1.20124, 0.84080, 1.28182, 1.28752, 1.08043, 0.77172, 0.91697, 0.96191, 1.36441, 0.72986, 1.01171, 0.73956, 1.03566, 1.44556, 0.02677, 1.31843, 0.49390, 1.14688, 1.45582, 0.16101, 0.82555, 1.47633, np.nan, np.nan] assert_almost_equal(res_mult.seasonal.values.squeeze(), seasonal, 4) assert_almost_equal(res_mult.trend.values.squeeze(), trend, 2) assert_almost_equal(res_mult.resid.values.squeeze(), random, 4) assert_almost_equal(res_mult_override.seasonal.values.squeeze(), seasonal, 4) assert_almost_equal(res_mult_override.trend.values.squeeze(), trend, 2) assert_almost_equal(res_mult_override.resid.values.squeeze(), random, 4) assert_equal(res_mult.seasonal.index.values.squeeze(), self.data.index.values)
def test_pandas_nofreq(self): # issue #3503 nobs = 100 dta = pd.Series([x % 3 for x in range(nobs)] + np.random.randn(nobs)) res_np = seasonal_decompose(dta.values, freq=3) res = seasonal_decompose(dta, freq=3) atol = 1e-8 rtol = 1e-10 assert_allclose(res.seasonal.values.squeeze(), res_np.seasonal, atol=atol, rtol=rtol) assert_allclose(res.trend.values.squeeze(), res_np.trend, atol=atol, rtol=rtol) assert_allclose(res.resid.values.squeeze(), res_np.resid, atol=atol, rtol=rtol)
def _create_grid_plot_of_trends(df, X, col_list, filename): width = 600 height = 400 color_palette = [ 'Black', 'Red', 'Purple', 'Green', 'Brown', 'Yellow', 'Cyan', 'Blue', 'Orange', 'Pink'] i = 0 #2 columns, so number of rows is total /2 row_index = 0 row_list = [] row = [] for col in col_list[1:]: #skip the date column # create a new plot s1 = figure(x_axis_type = 'datetime', width=width, plot_height=height, title=col + ' trend') #seasonal decompae to extract seasonal trends decomposition = seasonal_decompose(np.array(df[col]), model='additive', freq=15) s1.line(X, decomposition.trend, color=color_palette[i % len(color_palette)], alpha=0.5, line_width=2) row.append(s1) if len(row) == 2: row_copy = copy.deepcopy(row) row_list.append(row_copy) row = [] i = 0 i += 1 # put all the plots in a grid layout p = gridplot(row_list) save(vplot(p), filename=filename, title='trends')
def make_stationary(self): # remove trend and seasonality #for positive trend, to penalize higher values do log/squqreroot/cube root etc... self.ts_log = np.log(self.df) #estimate or model trend, then remove from the series. diff appraoches # aggregation: take avg for monthly/weekly avg # smooth: taking rolling avg # poly fit : fit a regression model # Exanoke 1: using smoothing as example, rolling avg moving_avg = pd.rolling_mean(self.df,window=287) ts_log_moving_avg_diff = self.ts_log - moving_avg ts_log_moving_avg_diff.dropna(inplace=True) # Example 2: using exponential weighted moving avg (EWMA) # halflife is same as window, how many datapoint to make up 1 cycle expwighted_avg = pd.ewma(self.ts_log, halflife=287) ts_log_ewma_diff = self.ts_log - expwighted_avg # Example 3: differencing: take the difference of the observation at a particular instant # with that at the previous instant self.ts_log_diff = self.ts_log - self.ts_log.shift() # Example 4: decomposing # trend and seasonality are modeled separately and the remaining part of the series is returned # pandas.DataFrame with index doesn't work, need to pass in numpy value as datafram.values decomposition = seasonal_decompose(ts_log.values, freq=288) trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid
def seasonal_decompose(timeSeries, freq = 34): # Seasonal decomposition using moving averages decomposition = tsa_seasonal.seasonal_decompose(timeSeries, freq = freq) trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid return [trend, seasonal, residual]
def decomp(ts): decomposition = seasonal_decompose(ts[Y_name]) fig = decomposition.plot() plt.tight_layout() fig.savefig('decomp.png', bbox_inches="tight") trend = decomposition.trend seasonal = decomposition.seasonal resid = decomposition.resid
def test_interpolate_trend(self): x = np.arange(6) trend = seasonal_decompose(x, freq=2).trend assert_equal(trend[0], np.nan) trend = seasonal_decompose(x, freq=2, extrapolate_trend=1).trend assert_almost_equal(trend, x) trend = seasonal_decompose(x, freq=2, extrapolate_trend='freq').trend assert_almost_equal(trend, x) # 2d case x = np.tile(np.arange(6), (2, 1)).T trend = seasonal_decompose(x, freq=2, extrapolate_trend=1).trend assert_almost_equal(trend, x) trend = seasonal_decompose(x, freq=2, extrapolate_trend='freq').trend assert_almost_equal(trend, x)
def decompose_pre(ts): ts_log = np.log(ts) decomposition = seasonal_decompose(ts_log.values, freq = 24) # decomposition.plot() # plt.show(block= False) ts_log_decompose = ts_log ts_log_decompose.plays = decomposition.resid # print ts_log_decompose ts_log_decompose.dropna(inplace = True) stationarity_test(ts_log_decompose) return ts_log_decompose
def freq(df,col,max1): "To find the required freq for the decompostion " count = None for i in range(1,max1): try: decomposed = seasonal_decompose(df[col].values, freq=i) decomposed.resid = decomposed.resid[[~np.isnan(decomposed.resid)]] print decomposed.resid ##decomposed.resid = [1,2,1,2,1,2] x = np.array(decomposed.resid) z,p = stats.kstest(x,'norm') if(p<0.055): print 'It is not the required freq' else: print 'it is the required freq' count = i except ValueError: pass decompose(df,col,i) return count
def test_filt(self): filt = np.array([1/8., 1/4., 1./4, 1/4., 1/8.]) res_add = seasonal_decompose(self.data.values, filt=filt, freq=4) seasonal = [62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25] trend = [np.nan, np.nan, 159.12, 204.00, 221.25, 245.12, 319.75, 451.50, 561.12, 619.25, 615.62, 548.00, 462.12, 381.12, 316.62, 264.00, 228.38, 210.75, 188.38, 199.00, 207.12, 191.00, 166.88, 72.00, -9.25, -33.12, -36.75, 36.25, 103.00, 131.62, np.nan, np.nan] random = [np.nan, np.nan, 78.254, 70.254, -36.710, -94.299, -6.371, -62.246, 105.415, 103.576, 2.754, 1.254, 15.415, -10.299, -33.246, -27.746, 46.165, -57.924, 28.004, -36.746, -37.585, 151.826, -75.496, 86.254, -10.210, -194.049, 48.129, 11.004, -40.460, 143.201, np.nan, np.nan] assert_almost_equal(res_add.seasonal, seasonal, 2) assert_almost_equal(res_add.trend, trend, 2) assert_almost_equal(res_add.resid, random, 3)
XGB_result.index = XGB_result.index.astype('string') XGB_result.drop(['Theoretical'],axis=1,inplace=True) XGB_result=XGB_result.T XGB_result.to_json('Muppandal_Predictions.json', orient='records') end_time = datetime.now() print("Time required to run a single script:", end_time - start_time) """# Dilated CNN""" from pmdarima.arima import auto_arima from statsmodels.tsa.seasonal import seasonal_decompose plt.figure(figsize=(14,6)) temp_t = df['Energy'][7200:7600] decompose = seasonal_decompose(temp_t,model= 'add') decompose.plot(); from statsmodels.tsa.holtwinters import ExponentialSmoothing, SimpleExpSmoothing alpha = 0.1 EXP_df = df['Energy'] train_bound = EXP_df.shape[0]-48*1 EXP_df_train = EXP_df[:train_bound] EXP_df_test = EXP_df[-48:] forecast_length = 48 EXP_results = pd.DataFrame() EXP_results['Theoretical']=EXP_df_test
import pandas as pd import numpy as np from keras.models import Sequential from keras.layers import Dense from statsmodels.tsa.seasonal import seasonal_decompose from sklearn.preprocessing import MinMaxScaler from keras.preprocessing.sequence import TimeseriesGenerator df = pd.read_csv('./statmodel/Data/Alcohol_Sales.csv', index_col='DATE', parse_dates=True) df.index.freq = 'MS' df.columns = ['Sales'] df.plot() plt.show() result = seasonal_decompose(df['Sales']) result.plot() plt.show() train = df.iloc[:313] test = df.iloc[313:] print(len(test)) scaler = MinMaxScaler() scaler.fit(train) #finds the ma vale on the training data set scaled_train = scaler.transform(train) scaled_test = scaler.transform(test) n_input = 2 n_features = 1 generator = TimeseriesGenerator(scaled_train, scaled_train,
#Weighted MA, when we don't know the period, adjust parameters when necessary # http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-moment-functions expweighted_MA = data_log.ewm(min_periods=0, adjust=True, ignore_na=False, halflife=12).mean() data_log_ewma = data_log - expweighted_MA test_stationarity(data_log_ewma) # Differencing, first order, subtract t+1 from t data_log_diff = data_log - data_log.shift() plt.plot(data_log_diff) # Decomposition, could be very useful but need to understand better how to add back to the forecast decomposition = seasonal_decompose(data_log) trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid # what is left after removing trend and seasonal plt.subplot(411) plt.plot(data_log, label='Original') plt.legend(loc='best') plt.subplot(412) plt.plot(trend, label='Trend') plt.legend(loc='best') plt.subplot(413) plt.plot(seasonal,label='Seasonality') plt.legend(loc='best') plt.subplot(414) plt.plot(residual, label='Residuals') plt.legend(loc='best')
def make_stationary(time_series_data): """ One of the first tricks to reduce trend can be transformation. For example, in this case we can clearly see that there is a significant positive trend. So we can apply transformation which penalize higher values more than smaller values. These can be taking a log, square root, cube root, etc. :param time_series_data: :return: """ ts_log = np.log(time_series_data) plt.plot(ts_log) plt.show() ##The visible forward trend needs to be now removed from the data (right now the trend and noise are present ##Estimating trend from the data ##Aggregation - taking average for a time period like monthly/weekly averages ##Smoothing - taking rolling averages ##Polynomial Fitting - fit a regression model ##Smoothing #we take average of k consecutive values depending on the frequency of time series. Here we can take the average # over the past 1 year, i.e. last 12 values. moving_avg = pd.rolling_mean(ts_log, 12) plt.plot(moving_avg, color = "green") plt.plot(ts_log, color = "red") plt.show() #since we are taking average of last 12 values, rolling mean is not defined for first 11 values. This can be # observed as ts_log_moving_avg_diff = ts_log-moving_avg print ts_log_moving_avg_diff.head(14) #Let us drop the first 11 NAN values avg_diff = ts_log_moving_avg_diff.dropna() stationary(avg_diff) result = dickey_fuller(avg_diff["#Passengers"]) print result ##Now we can see that rolling values appear to be varying slightly but there is no specific trend. Also, the test # statistic is smaller than the 5% critical values so we can say with 95% confidence that this is a stationary series. # However, a drawback in this particular approach is that the time-period has to be strictly defined. In this case # we can take yearly averages but in complex situations like forecasting a stock price, its difficult to come up # with a number. So we take a 'weighted moving average' where more recent values are given a higher weight. There # can be many technique for assigning weights. A popular one is exponentially weighted moving average where weights # are assigned to all the previous values with a decay factor weight_ma = pd.ewma(ts_log, halflife = 12) ts_log_weight_diff = ts_log-weight_ma plt.plot(weight_ma, color="green") plt.plot(ts_log, color="red") plt.show() stationary(ts_log_weight_diff) #This TS has even lesser variations in mean and standard deviation in magnitude. Also, the test statistic is smaller # than the 1% critical value, which is better than the previous case. Note that in this case there will be no # missing values as all values from starting are given weights. So it'll work even with no previous values. ##Removing trend and seasonality from a highly seasonal data #Differencing - taking the differece with a particular time lag #Decomposition - modeling both trend and seasonality and removing them from the model #1. differencing ts_log_diff = ts_log - ts_log.shift() plt.plot(ts_log_diff) plt.show() #trend seems to have been reduced significantly # print ts_log_diff #first value is unkown because its is estimating by shifting ts_log_diff.dropna(inplace = True) stationary(ts_log_diff) #Dickey-Fuller test statistic is less than the 10% critical value, thus the TS is stationary with 90% confidence. # We can also take second or third order differences which might get even better results in certain applications. ts_log_diff2 = ts_log - ts_log.shift(periods = 2) plt.plot(ts_log_diff2) plt.show() # trend seems to have been reduced significantly # print ts_log_diff2 # first value is unkown because its is estimating by shifting ts_log_diff2.dropna(inplace=True) stationary(ts_log_diff2) ##2. Decomposing # both trend and seasonality are modeled separately and the remaining part of the series is returned. decomp = seasonal_decompose(ts_log) trend = decomp.trend season = decomp.seasonal residual = decomp.resid plt.subplot(411) plt.plot(ts_log, label='Original') plt.legend(loc='best') plt.subplot(412) plt.plot(trend, label='Trend') plt.legend(loc='best') plt.subplot(413) plt.plot(season, label='Seasonality') plt.legend(loc='best') plt.subplot(414) plt.plot(residual, label='Residuals') plt.legend(loc='best') plt.tight_layout() plt.show() #Lets check stationarity of residuals: ts_log_decompose = residual ts_log_decompose.dropna(inplace=True) stationary(ts_log_decompose) #The Dickey-Fuller test statistic is significantly lower than the 1% critical value. This TS is close to stationary. return ts_log
### DELETE OUTLIERS thre=1.3 delete=np.where(resid9<np.mean(resid9)-thre*np.std(resid9))[0] train0=np.delete(np.array(dataframe.ix[:,0]),delete) train=np.sqrt(train0) plt.hist(train) rollmean = pd.rolling_mean(train, window=20) rollstd = pd.rolling_std(train, window=20) ts_log0 = np.log(train) ts_log=pd.DataFrame(ts_log0).dropna() decomposition = seasonal_decompose(np.array(ts_log).reshape(len(ts_log),),freq=100) trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid z=np.where(seasonal==min(seasonal))[0] period=z[2]-z[1] look_back = period plt.figure(figsize=(8,8)) plt.subplot(411) plt.plot(ts_log, label='Original') plt.legend(loc='upper left') plt.subplot(412)
def test_2d(self): x = np.tile(np.arange(6), (2, 1)).T trend = seasonal_decompose(x, freq=2).trend expected = np.tile(np.arange(6, dtype=float), (2, 1)).T expected[0] = expected[-1] = np.nan assert_equal(trend, expected)
mse = mean_squared_error(valid, model_predictions) print('MSE: %f' % mse) print("Calcoliamo MAE=%.4f" % (sum(abs(errore)) / len(errore))) # %% # Proviamo a usare ETS applicandolo alle componenti trend e stagionalità. # Per i residui, essendo una serie di rumore bianco (priva di componenti), # viene usato ARIMA perchè con ETS potremmo al limite usare Simple Exponential Smoothing # ma non riesce a generare previsioni soddisfacenti. # # NOTA: qua sommare i residual è di poco conto. # Decomponiamo la serie temporale. # two_sided=False significa che la media mobile (processo descritto nel notebook) # viene calcolata a partire dai valori passati invece che essere normalmente centrata. decomposition = seasonal_decompose(train, period=year, two_sided=False) # Recuperiamo le componenti trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid # Rimuoviamo eventuali valori NaN dalle serie trend.dropna(inplace=True) seasonal.dropna(inplace=True) residual.dropna(inplace=True) # Creiamo dei modelli per trend e seasonal # USO ARIMA PER I RESIDUAL VISTO CHE SONO UNA COMPONENTE STAZIONARIA trend_model = ExponentialSmoothing(trend,
adft = adfuller(timeseries, autolag='AIC') # output for dft will give us without defining what the values are. #hence we manually write what values does it explains using a for loop output = pd.Series(adft[0:4], index=[ 'Test Statistics', 'p-value', 'No. of lags used', 'Number of observations used' ]) for key, values in adft[4].items(): output['critical value (%s)' % key] = values print(output) test_stationarity(df_close) result = seasonal_decompose(df_close, model='multiplicative', freq=30) fig = plt.figure() fig = result.plot() fig.set_size_inches(16, 9) from pylab import rcParams rcParams['figure.figsize'] = 10, 6 df_log = np.log(df_close) moving_avg = df_log.rolling(12).mean() std_dev = df_log.rolling(12).std() plt.legend(loc='best') plt.title('Moving Average') plt.plot(std_dev, color="black", label="Standard Deviation") plt.plot(moving_avg, color="red", label="Mean") plt.legend() plt.show()
import os os.chdir(r"C:\Users\Lenovo\Desktop\umeed\csv files") import numpy as np import pandas as pd import matplotlib.pyplot as plt from statsmodels.tsa.seasonal import seasonal_decompose # Read the AirPassengers dataset airline = pd.read_csv('dairy.csv', index_col='Month', parse_dates=True) # Print the first five rows of the dataset airline.head() # ETS Decomposition result = seasonal_decompose(airline['#Passengers'], model='multiplicative', period=30) # Import the library from pmdarima import auto_arima # Ignore harmless warnings import warnings warnings.filterwarnings("ignore") # Fit auto_arima function to AirPassengers dataset stepwise_fit = auto_arima( airline['#Passengers'], start_p=1, start_q=1, max_p=3, max_q=3,
from statsmodels.tsa.seasonal import seasonal_decompose output = open('AIRMA_mars_tianchi_artist_plays_predict.csv','w') for artist in artists: print artist, len(daily_play[artist]) y_data = daily_play[artist][-30:] l = len(y_data) dates_str = sm.tsa.datetools.date_range_str('2005m1',length=l) dates_all = sm.tsa.datetools.dates_from_range('2005m1', length=l) y = pd.Series(y_data, index=dates_all) plt.plot(y) decomposition = seasonal_decompose(y) trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid y_decompose = residual y_decompose.dropna(inplace=True) test_stat(y_decompose) # remove moving avg moving_avg = y.rolling(window=12,center=False).mean() y_moving_avg_diff = y - moving_avg y_moving_avg_diff.dropna(inplace=True) print "Stationarity for TS - moving avg:"
filename = 'ERA5_Arctic_clouds_1979_2019.csv' df0 = pd.read_csv(filename, sep=',') var0_raw = df0['Total cloud cover (%)'] var1_raw = df0['Total column cloud water (g m**-2)'] var0 = var0_raw[0:nmons] var1 = var1_raw[0:nmons] y0_raw = df0['Year'] m0_raw = df0['Month'] y0 = y0_raw[0:nmons] m0 = m0_raw[0:nmons] ######Decompose times series data # Time Series Decomposition result_mul0 = seasonal_decompose(var0.values, model='additive', freq=12, two_sided=False) deseason0 = var0.values / result_mul0.seasonal detrend0 = var0.values - result_mul0.trend re0 = result_mul0.resid result_mul1 = seasonal_decompose(var1.values, model='additive', freq=12, two_sided=False) deseason1 = var1.values / result_mul1.seasonal detrend1 = var1.values - result_mul1.trend re1 = result_mul1.resid ######Save data into csv file dict = {
#visualise rolling statistics and standard deviation da.plot_rollingStatistics(usd) da.plot_rollingStatistics(brent) da.plot_rollingStatistics(dax) da.plot_rollingStatistics(nasdaq) da.plot_rollingStatistics(nasdaq100) da.plot_rollingStatistics(wti) da.plot_rollingStatistics(xau) import preprocessing as pp usd_log = pp.log_transform(usd) from statsmodels.tsa.seasonal import seasonal_decompose decomposition = seasonal_decompose(usd_log) trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid plt.subplot(411) plt.plot(usd_log, label='Original') plt.legend(loc='best') plt.subplot(412) plt.plot(trend, label='Trend') plt.legend(loc='best') plt.subplot(413) plt.plot(seasonal, label='Seasonality') plt.legend(loc='best') plt.subplot(414)
# Code example of decompositioning # prints the four graphs for ONE defined Article from statsmodels.tsa.seasonal import seasonal_decompose from matplotlib import pyplot #define id to calculate (single examination) art_id = [722] # load and group data to monthly grouped = group_by_frequence(get_dataframe(art_id)) # prepare data for training series = grouped['Menge'] result = seasonal_decompose(series, model='additive') # printing out the values for each series #print(result.trend) #print(result.seasonal) #print(result.resid) #print(result.observed) # printing all as charts result.plot() pyplot.show() # In[44]: # import the data
#%% view_hour.reset_index(inplace=True) view_hour = view_hour.set_index('datetime') view_hour.sort_index(inplace=True) view_hour.info() #%% ## Write out the csv file to the local directory #view_hour.to_csv('/Users/swe03/view_hour.txt', index=True) #%% view_hour.describe() #%% decomposition = seasonal_decompose(view_hour['distinct_freq_sum'].values,freq=24 ) fig = decomposition.plot() fig.set_size_inches(50, 8) #%% # Graph Autocorrelation and Partial Autocorrelation data fig, axes = pplt.subplots(1, 2, figsize=(15,4)) fig = sm.graphics.tsa.plot_acf(view_hour['distinct_freq_sum'], lags=24, ax=axes[0]) fig = sm.graphics.tsa.plot_pacf(view_hour['distinct_freq_sum'], lags=24, ax=axes[1]) #%% ## Specify the SARIMAX model ## Default for the CI is 95%. Set in the Alpha parameter for conf_int function
residual = np.zeros((datalimit2, np.shape(data)[3])) #Plot param plt.rcParams['figure.figsize'] = (10, 5) #Initialize x and y x_data = np.arange(0, datalimit2) / calendarYear y_data_all = data[datalimit1:datalimit2, 0, 0, :] #Choose scenarios scenarios = [0, 1, 2, 4, 5, 6, 7, 9] #range(np.shape(data)[3]) for i in scenarios: y_data = data[datalimit1:datalimit2, 0, 0, i] #============================================================================== #STEP0 - Identify, trend, seasonality, residual result = seasonal_decompose(y_data, freq=365, model='additive') #Fit lineat trend #Get parameters: alpha and beta #Fit curve with lmfit line_mod = LinearModel(prefix='line_') pars_line = line_mod.guess(y_data, x=x_data) result_line_model = line_mod.fit(y_data, pars_line, x=x_data) print(result_line_model.fit_report()) line_intercept[:, i] = result_line_model.params['line_intercept']._val line_slope[:, i] = result_line_model.params['line_slope']._val trend[:, i] = result_line_model.best_fit #==============================================================================
def model_feature(file_name, df, feature): #first create a directory by feature name to store the results file_name_wo_extn = file_name[:-4] dir_name = os.path.join(os.path.sep, os.getcwd(), OUTPUT_DIR_NAME, file_name_wo_extn, feature) if os.path.exists(dir_name): logger.info('dir name is ==> ' + dir_name) #delete existing directory if any shutil.rmtree(dir_name) os.makedirs(dir_name) #temporarily change to the new feature directory curr_dir = os.getcwd() os.chdir(dir_name) #create a string buffer to store all information about this feature which will then be written to a file at the end s = '' s = _write_to_string(s, '----------- Time Series Analysis for ' + feature + ' from ' + str(df['Date'][0]) + ' to ' + str(df['Date'][len(df['Date']) - 1]) + '-----------') #only look at the fearture of intrest as a univariate time series #x-axis is the time.. X = np.array(df['Date'], dtype=np.datetime64) #df['First Difference'] = df[feature] - df[feature].shift() y = np.array(df[feature] - df[feature].shift()) _draw_multiple_line_plot('first_difference.html', feature, [X], [y], ['navy'], ['packets percentage delta'], [None], [1], 'datetime', 'Date', 'Packets Percentage Delta', y_start=-100, y_end=100) #calculate autocorelation and partial auto corelation for the first difference lag_correlations = acf(y[1:]) lag_partial_correlations = pacf(y[1:]) logger.info ('lag_correlations') logger.info(lag_correlations) s = _write_to_string(s, 'lag_correlations') s = _write_to_string(s, str(lag_correlations)) y = lag_correlations _draw_multiple_line_plot('lag_correlations.html', 'lag_correlations', [X], [y], ['navy'], ['lag_correlations'], [None], [1], 'datetime', 'Date', 'lag_correlations', y_start=-1, y_end=1) logger.info ('lag_partial_correlations') logger.info(lag_partial_correlations) s = _write_to_string(s, 'lag_partial_correlations') s = _write_to_string(s, str(lag_partial_correlations)) y = lag_partial_correlations _draw_multiple_line_plot('lag_partial_correlations.html', 'lag_partial_correlations', [X], [y], ['navy'], ['lag_partial_correlations'], [None], [1], 'datetime', 'Date', 'lag_partial_correlations', y_start=-1, y_end=1) #seasonal decompae to extract seasonal trends decomposition = seasonal_decompose(np.array(df[feature]), model='additive', freq=15) _draw_decomposition_plot('decomposition.html', X, decomposition, 'seasonal decomposition', 'datetime', 'decomposition', width=600, height=400) #run various ARIMA models..and see which fits best... s, model_names, models, results, MAE = _try_ARIMA_and_ARMA_models(s, df, feature) #check if we got consistent output, all 4 variables returns by the prev function are # lists..they should be the same length len_list = [len(model_names), len(models), len(results), len(MAE)] if len(len_list) == len_list.count(len_list[0]): #looks consistent, all lengths are equal logger.info('_try_ARIMA_models output looks consistent, returns %d models ' % len(model_names)) else: logger.info('_try_ARIMA_models output IS NOT consistent, returns %d model names ' % len(model_names)) logger.info(len_list) logger.info('EXITING.....') sys.exit() s, predicted_dates, predicted, model_selection_list = _do_forecasts(df, feature, X, s, model_names, models, results, MAE) #write everything to file with open(feature + '.txt', "w") as text_file: text_file.write(s) #go back to parent directory os.chdir(curr_dir) #return the results return feature, model_names, models, results, MAE, predicted_dates, predicted, model_selection_list
upper_series, color='k', alpha=.15) plt.title("SARIMA - Final Forecast of a10 - Drug Sales") plt.show() # SARIMAX model with exogenous variable # as an example, use seasonal index from last 36 months # see how model looks when we force recent seasonal trend # Compute Seasonal Index from statsmodels.tsa.seasonal import seasonal_decompose from dateutil.parser import parse # multiplicative seasonal component result_mul = seasonal_decompose(data['value'][-36:], # 3 years model='multiplicative', extrapolate_trend='freq') seasonal_index = result_mul.seasonal[-12:].to_frame() seasonal_index['month'] = pd.to_datetime(seasonal_index.index).month # merge with the base data data['month'] = data.index.month df = pd.merge(data, seasonal_index, how='left', on='month') df.columns = ['value', 'month', 'seasonal_index'] df.index = data.index # reassign the index. sxmodel = pm.auto_arima(df[['value']], exogenous=df[['seasonal_index']], start_p=1, start_q=1, test='adf', max_p=3, max_q=3, m=12,
ts_log_moving_avg_diff.dropna(inplace=True) # Pandas in action :p # after the above, make sure that the test_statistic is lesser than the critical value. # For this you can run is_stationary again. # is_stationary(ts_log_moving_avg_diff, 12) expwighted_avg = pd.ewma(ts_log, halflife=12) # Exponential weights make sure that recent observations have more importance ts_log_ewma_diff = ts_log - expwighted_avg # test_stationarity(ts_log_ewma_diff) # On testing, apparently this has a lower test statistic value and hence # better as a stationary series from statsmodels.tsa.seasonal import seasonal_decompose decomposition = seasonal_decompose(ts_log) trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid plt.subplot(411) plt.plot(ts_log, label="Original") plt.legend(loc="best") plt.subplot(412) plt.plot(trend, label="Trend") plt.legend(loc="best") plt.subplot(413) plt.plot(seasonal, label="Seasonality") plt.legend(loc="best") plt.subplot(414)
plt.ylabel('#Maglie vendute') plt.xlabel('Data') plt.plot(train, label="training set", color=TSC) plt.plot(valid, label="validation set", color =VSC, linestyle = '--') plt.plot(rolmean, color=OLC, label='Rolling Mean', linewidth=3) plt.plot(rolstd, color=OLC, label='Rolling Std', linestyle = '--', linewidth=3) plt.legend(loc='best') plt.show() mt.ac_pac_function(train, lags = 400) #%% # Decompongo la serie # con periodo di 365 o 183 giorni (year e half_year) result = seasonal_decompose(train, model = 'additive', period = season, extrapolate_trend='freq') #%% trend = result.trend seasonality = result.seasonal residuals = result.resid strength_seasonal = max(0, 1 - residuals.var()/(seasonality + residuals).var()) print('La forza della stagionalità di periodo {} è: {}'.format(season, strength_seasonal)) plt.figure(figsize=(40, 20), dpi=80) plt.plot(trend) plt.figure(figsize=(40, 20), dpi=80) plt.plot(residuals) plt.figure(figsize=(40, 20), dpi=80)
def test_ndarray(self): res_add = seasonal_decompose(self.data.values, freq=4) seasonal = [62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25, 62.46, 86.17, -88.38, -60.25] trend = [np.nan, np.nan, 159.12, 204.00, 221.25, 245.12, 319.75, 451.50, 561.12, 619.25, 615.62, 548.00, 462.12, 381.12, 316.62, 264.00, 228.38, 210.75, 188.38, 199.00, 207.12, 191.00, 166.88, 72.00, -9.25, -33.12, -36.75, 36.25, 103.00, 131.62, np.nan, np.nan] random = [np.nan, np.nan, 78.254, 70.254, -36.710, -94.299, -6.371, -62.246, 105.415, 103.576, 2.754, 1.254, 15.415, -10.299, -33.246, -27.746, 46.165, -57.924, 28.004, -36.746, -37.585, 151.826, -75.496, 86.254, -10.210, -194.049, 48.129, 11.004, -40.460, 143.201, np.nan, np.nan] assert_almost_equal(res_add.seasonal, seasonal, 2) assert_almost_equal(res_add.trend, trend, 2) assert_almost_equal(res_add.resid, random, 3) res_mult = seasonal_decompose(np.abs(self.data.values), 'm', freq=4) seasonal = [1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931, 1.0815, 1.5538, 0.6716, 0.6931] trend = [np.nan, np.nan, 171.62, 204.00, 221.25, 245.12, 319.75, 451.50, 561.12, 619.25, 615.62, 548.00, 462.12, 381.12, 316.62, 264.00, 228.38, 210.75, 188.38, 199.00, 207.12, 191.00, 166.88, 107.25, 80.50, 79.12, 78.75, 116.50, 140.00, 157.38, np.nan, np.nan] random = [np.nan, np.nan, 1.29263, 1.51360, 1.03223, 0.62226, 1.04771, 1.05139, 1.20124, 0.84080, 1.28182, 1.28752, 1.08043, 0.77172, 0.91697, 0.96191, 1.36441, 0.72986, 1.01171, 0.73956, 1.03566, 1.44556, 0.02677, 1.31843, 0.49390, 1.14688, 1.45582, 0.16101, 0.82555, 1.47633, np.nan, np.nan] assert_almost_equal(res_mult.seasonal, seasonal, 4) assert_almost_equal(res_mult.trend, trend, 2) assert_almost_equal(res_mult.resid, random, 4) # test odd res_add = seasonal_decompose(self.data.values[:-1], freq=4) seasonal = [68.18, 69.02, -82.66, -54.54, 68.18, 69.02, -82.66, -54.54, 68.18, 69.02, -82.66, -54.54, 68.18, 69.02, -82.66, -54.54, 68.18, 69.02, -82.66, -54.54, 68.18, 69.02, -82.66, -54.54, 68.18, 69.02, -82.66, -54.54, 68.18, 69.02, -82.66] trend = [np.nan, np.nan, 159.12, 204.00, 221.25, 245.12, 319.75, 451.50, 561.12, 619.25, 615.62, 548.00, 462.12, 381.12, 316.62, 264.00, 228.38, 210.75, 188.38, 199.00, 207.12, 191.00, 166.88, 72.00, -9.25, -33.12, -36.75, 36.25, 103.00, np.nan, np.nan] random = [np.nan, np.nan, 72.538, 64.538, -42.426, -77.150, -12.087, -67.962, 99.699, 120.725, -2.962, -4.462, 9.699, 6.850, -38.962, -33.462, 40.449, -40.775, 22.288, -42.462, -43.301, 168.975, -81.212, 80.538, -15.926, -176.900, 42.413, 5.288, -46.176, np.nan, np.nan] assert_almost_equal(res_add.seasonal, seasonal, 2) assert_almost_equal(res_add.trend, trend, 2) assert_almost_equal(res_add.resid, random, 3)
fs=FS, nfft=256, window=('tukey', 0.25), detrend='constant', nperseg=60, noverlap=30, scaling='density') plt.pcolormesh(t, freq, Sxx) plt.xlabel('Czas [mies]') plt.ylabel('Częstotliwość [1/mies]') plt.colorbar().set_label('Widmowa gęstość mocy [V^2/1/mies]') f.savefig(PATH_TO_PLOTS + '/spectrogram.pdf', bbox_inches='tight') plt.show() # signal decomposition = trend + seasonal + error decomposition = seasonal_decompose(series_monthly, model="additive") f = decomposition.plot() f.savefig(PATH_TO_PLOTS + '/decomposition.pdf', bbox_inches='tight') plt.show() # split train-test train = series_monthly.loc[series_monthly.index < SPLIT_DATE] test = series_monthly.loc[series_monthly.index >= SPLIT_DATE] print('Train size = {} %'.format(100 * len(train) / len(series_monthly))) print('Test size = {} %'.format(100 * len(test) / len(series_monthly))) f = plt.figure() plt.plot(train) plt.plot(test) plt.gcf().set_size_inches(10, plt.gcf().get_size_inches()[1]) plt.title('Podział na zbiór trenujący i testowy') plt.xlabel('Data')
#acf is auto correlation fucntion and pacf is partial acf (works only for 1 d array) #iloc is integer location, check pandas lag_corr = acf (stock_data ['Logged First Difference'].iloc [1:]) lag_partial_corr = pacf (stock_data ['Logged First Difference'].iloc [1:]) #fig, ax = plt.subplots (figsize = (16,12)) #ax.plot (lag_corr) #pylab.show () # To extract trends and seasonal patterns for TS analysis from statsmodels.tsa.seasonal import seasonal_decompose #set the frequency value right for monthly set freq = 30 decomposition = seasonal_decompose(stock_data['Natural Log'], model='additive', freq=30) #fig = decomposition.plot() #pylab.show () #lets fit some ARIMA, keep indicator as 1 and rest as zero ie (p,q,r) = (1,0,0) #the snippet below does it for undifferenced series #model = sm.tsa.ARIMA (stock_data ['Natural Log'].iloc[1:], order = (1,0,0)) #result = model.fit (disp = -1) #stock_data ['Forecast'] = result.fittedvalues #stock_data [['Natural Log', 'Forecast']].plot (figsize = (16,12)) #pylab.show () #trying an exponential smoothing model model = sm.tsa.ARIMA(stock_data['Logged First Difference'].iloc[1:], order=(0, 0, 1)) results = model.fit(disp=-1)
from pyramid.arima import auto_arima from statsmodels.tsa.seasonal import seasonal_decompose # Import data data = pd.read_csv("data/industrial_production.csv", index_col=0) # Formatting data.index = pd.to_datetime(data.index, format='%Y-%m-%d') # Visualize ax = data.plot() fig = ax.get_figure() fig.savefig("output/arima_raw_data_line_plot.png") # Decomposition plot result = seasonal_decompose(data, model='multiplicative') fig = result.plot() fig.savefig("output/seasonal_decompose_plot.png") # Perform Seasonal ARIMA stepwise_model = auto_arima(data, start_p=1, d=1, start_q=1, max_p=1, max_d=1, max_q=1, start_P=1, D=1, start_Q=1, max_P=1,
# multiplicative decompose a contrived multiplicative time series from matplotlib import pyplot from statsmodels.tsa.seasonal import seasonal_decompose series = [i**2.0 for i in range(1,100)] result = seasonal_decompose(series, model='multiplicative', freq=1) result.plot() pyplot.show()
#Moving average movingAverage= indexedDataset_logScale.rolling(window=12).mean() movingSTD= indexedDataset_logScale.rolling(window=12).std() plt.plot(indexedDataset_logScale) plt.plot(movingAverage,color='red') datasetLogScaleMinusMovingAverage= indexedDataset_logScale - movingAverage datasetLogScaleMinusMovingAverage.head(12) #remove nan values datasetLogScaleMinusMovingAverage.dropna(inplace=True) datasetLogScaleMinusMovingAverage.head(10) #Plotting trend,seasonal,residual error from statsmodels.tsa.seasonal import seasonal_decompose decomposition= seasonal_decompose(indexedDataset_logScale,freq=1) trend= decomposition.trend seasonal= decomposition.seasonal residual= decomposition.resid plt.subplot(411) plt.plot(indexedDataset_logScale,label='Original') plt.legend(loc='best') plt.subplot(412) plt.plot(trend,label='Trend') plt.legend(loc='best') plt.subplot(413) plt.plot(seasonal,label='Seasonality') plt.legend(loc='best') plt.subplot(414)
naive_errors = get_cv_errors(utility_index_cv_splits, naive_predictions) print("Naive errors:", naive_errors) plot_cv_predictions(naive_predictions) average_predictions = make_cv_predictions(utility_index_cv_splits, average_prediction) average_errors = get_cv_errors(utility_index_cv_splits, average_predictions) print("Average errors:", average_errors) plot_cv_predictions(average_predictions) sarima_order_kwargs = {"order": (1, 1, 1), "seasonal_order": (1, 1, 1, 12)} sarima_predictions = make_cv_predictions(utility_index_cv_splits, sarima_prediction, **sarima_order_kwargs) sarima_errors = get_cv_errors(utility_index_cv_splits, sarima_predictions) print("SARIMA errors:", sarima_errors) plot_cv_predictions(sarima_predictions) sarima_extrapolation = sarima_prediction(utility_index_df, 80, **sarima_order_kwargs) plt.plot(sarima_extrapolation.index, sarima_extrapolation["value"], color="g") utility_index_additive_decomposition = statsmodels_seasonal.seasonal_decompose( utility_index_df, model="additive", period=12) utility_index_additive_decomposition.plot() utility_index_multiplicative_decomposition = statsmodels_seasonal.seasonal_decompose( utility_index_df, model="multiplicative", period=12) utility_index_multiplicative_decomposition.plot() plt.show()
from scipy import stats import statsmodels.api as sm import statsmodels.api as sm from statsmodels.graphics.api import qqplot from statsmodels.tsa.stattools import adfuller # function to calculate MAE, RMSE from sklearn.metrics import mean_absolute_error, mean_squared_error df = pd.read_csv('chapter3//TS.csv') ts = pd.Series(list(df['Sales']), index=pd.to_datetime(df['Month'], format='%Y-%m')) from statsmodels.tsa.seasonal import seasonal_decompose decomposition = seasonal_decompose(ts) trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid ts_log = np.log(ts) ts_log.dropna(inplace=True) s_test = adfuller(ts_log, autolag='AIC') print("Log transform stationary check p value: ", s_test[1]) s_test = adfuller(ts, autolag='AIC') # extract p value from test results print("p value > 0.05 means data is non-stationary: ", s_test[1])
subplot_spec=outer_grid[0], wspace=0.0, hspace=0.0) ax = fig.add_subplot(inner_grid[0]) ax.axhline(y=0, c='black', lw=0.5) baseline_mean = np.array([np.mean(b) for b in B]) baseline_mean_norm = \ (baseline_mean - np.min(baseline_mean))/ np.max(baseline_mean) T = [] for i in range(len(C)): result_mul = seasonal_decompose(C[i], period=21, model='multiplicative', extrapolate_trend='freq') t = np.array(result_mul.trend) t = t - t[0] T.append(t) c = 'blue' alpha = 0.25 if t[0] < t[-1]: c = 'red' alpha = 0.5 ax.plot(range(len(t)), t, '-', lw=baseline_mean_norm[i], alpha=alpha, c=c) T_mean = np.average(T, axis=0, weights=baseline_mean_norm)
def dftest(series): res = adfuller(series) p = res[1] return p df = pd.read_csv('milk.csv') df.columns = ['Month', 'Qty'] df.dropna(inplace=True) df.set_index('Month', inplace=True) df.index = pd.to_datetime(df.index) from statsmodels.tsa.seasonal import seasonal_decompose df_decomposed = seasonal_decompose(df['Qty'], model='multiplicative') f = df_decomposed.plot() df['First Difference'] = df['Qty'] - df['Qty'].shift(1) df.dropna(inplace=True) df['Seasonal FD'] = df['First Difference'] - df['First Difference'].shift(12) df.dropna(inplace=True) plt.plot(df['Seasonal FD']) print(dftest(df['Seasonal FD'])) from statsmodels.tsa.stattools import acf, pacf acfgraph = acf(df['Seasonal FD'], nlags=5) pacfgraph = pacf(df['Seasonal FD'], nlags=5) plt.plot(acfgraph)
adjust=True).mean() df_log_exp_decay = df_log - rolling_mean_exp_decay df_log_exp_decay.dropna(inplace=True) get_stationarity(df_log_exp_decay) # In[69]: df_log_shift = df_log - df_log.shift() df_log_shift.dropna(inplace=True) get_stationarity(df_log_shift) # In[74]: decomposition = seasonal_decompose(df_log, freq=100) model = ARIMA(df_log, order=(2, 1, 2)) results = model.fit(disp=-1) plt.plot(df_log_shift) plt.plot(results.fittedvalues, color='red') # In[78]: predictions_ARIMA_diff = pd.Series(results.fittedvalues, copy=True) predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum() predictions_ARIMA_log = pd.Series(df_log.iloc[0], index=df_log.index) predictions_ARIMA_log = predictions_ARIMA_log.add( predictions_ARIMA_diff_cumsum, fill_value=0) predictions_ARIMA = np.exp(predictions_ARIMA_log) plt.plot(df['Price']) plt.plot(predictions_ARIMA)
# two_sided=True, extrapolate_trend=0) is the statsmodels library # implementation of the naive, or classical, decomposition method. # It breaks down a time series into 4 graphs, observed (or original), trend # (whether the graph tends to go up or down), seasonal (repeating short term # cycles), and residual (or noise). # model can either be 'additive' or 'multiplicative'. Both will give a result, # so to determine which one to use look at a df.plot() of the observed values. # If the magnitude of the seasonal variations appear to increase over time, # it is multiplicative. If they stay the same, it is additive. It is possible # to transform data into being additive by using sqrt for quadratic trend or # ln for exponential trend. In practice I fail to see the difference in result. # Furthermore, more advanced decomposition methods are advised over this one. # Later versions of statsmodels include STL decomposition under: # from statsmodels.tsa.seasonal import STL # There is also the facebook prophet package. decompose = seasonal_decompose(data['DOW'], model='additive') # You may plot the 4 graphs individually by using: # decompose.observed.plot() # decompose.trend.plot() # decompose.seasonal.plot() # decompose.resid.plot() decompose.plot() plt.show() plt.close() # We can tell a number of things from the seasonal_decompose plots. A upward # trend means the data is not stationary, meaning the mean and variance is not # constant over time. This is needed for ARIMA models having a constant # expected value unaffected by trend makes it easier to model. We can confirm # stationarity using the Augmented Dickey-Fuller test. # adfuller(x, maxlag=None, regression='c', autolag='AIC', store=False,
decomp = decomp.set_index(pd.DatetimeIndex(decomp['Date'])) # In[65]: #interpolate missing values #decomp['LogSales'].interpolate(inplace=True) # In[75]: #decompose time series from statsmodels.tsa.seasonal import seasonal_decompose decomposition = seasonal_decompose(decomp['LogSales'], model='additive', freq=12) decomposition2 = seasonal_decompose(decomp['Customers'], model='additive', freq=12) # In[37]: #%pylab inline #fig = plt.figure() #fig = decomposition.plot() # In[ ]:
samples = SequenceDinucProperties(npath, ppath) X = samples.getX() print(X.shape) print('>>>') tmp = X[:, 0, 30, 0] print(tmp.shape) print(tmp) plt.hist(tmp) sample = X[7, :, :, :] sample = sample.reshape(sample.shape[2], sample.shape[1]) print(sample.shape) print(sample) diff = list() for i in range(1, sample.shape[1]): val = sample[1, i] - sample[1, i - 1] diff.append(val) plt.plot(sample[1, :]) plt.plot(diff) result = seasonal_decompose(sample[1, :], model='additive', freq=1) #print(result.trend) #print(result.seasonal) #print(result.resid) #print(result.observed) result.plot() print('=' * 10)
print x #seasonal decompae to extract seasonal trends X = [] y = [] labels = [] line_width = [] dash_type = [] color_palette = [ 'Pink', 'Red', 'Orange', 'Yellow', 'Brown', 'Green', 'Cyan', 'Blue', 'Purple', 'Black'] c = [] for i in range(len(df2)): if df2['name'][i] == 'https' or df2['name'][i] == '0-100': continue decomposition = seasonal_decompose(np.array(df[df2['name'][i]]), model='additive', freq=5) X.append(date_series) y.append(decomposition.trend) labels.append(df2['name'][i]) line_width.append(2) dash_type.append(None) c.append(color_palette[i % len(color_palette)]) _draw_multiple_line_plot('growth_trends.html', 'growth_trend', X, y, c, labels, dash_type, line_width,
result = [series[0]] # first value is same as series for n in range(1, len(series)): result.append((series[:n + 1].mean())) return result df['cumulative'] = cumulative(df['sales']) df['cum2'] = df['sales'].mean() df['moving_average'] = df['sales'].rolling(window=10).mean() plt.style.use('fivethirtyeight') ax = df.plot(figsize=(18, 6), fontsize=14) plt.title("原始数据趋势图") plt.show() rcParams['figure.figsize'] = 18, 6 result_a = seasonal_decompose(data, model='additive') fig = result_a.plot() plt.title("累加效果图") plt.show() rcParams['figure.figsize'] = 18, 6 result_m = seasonal_decompose(data, model='multiplicative') fig = result_m.plot() plt.title("累乘效果图") plt.show() ################################################################ #Simple Exponential Smoothing (level) ''' xt=a+ϵtxt=a+ϵt (model) x̂ t,t+1=αxt+(1−α)x̂ t−1,tx^t,t+1=αxt+(1−α)x^t−1,t (forecast)
bus.set_index(['index'], inplace=True) bus.index.name=None len(date_list) # check len(bus.index) # check # riders bus.columns= ['riders'] # df['riders'] = df.riders.apply(lambda x: int(x)*100) bus['riders'] = bus.riders.apply(lambda x: int(x)) bus.riders bus.riders.plot(figsize=(12,8), title= 'Monthly Ridership (100,000s)', fontsize=14) # plt.savefig('month_ridership.png', bbox_inches='tight') # optional save decomposition = seasonal_decompose(bus.riders, freq=12) fig = plt.figure() fig = decomposition.plot() fig.set_size_inches(15, 8) # plt.savefig('seasonal.png', bbox_inches='tight') # optional save # grab just one graphic doing the following: seasonal = decomposition.seasonal seasonal.plot() # define Dickey-Fuller test from statsmodels.tsa.stattools import adfuller def test_stationarity(timeseries): #Determing rolling statistics rolmean = pd.rolling_mean(timeseries, window=12)
# ## Decomposing # Both trend and seasonality are modelled separately and the remaining part of the series is returned. For more details watch these videos: <br/> # # Seasonal Decomposition and Forecasting: # # 1. https://www.youtube.com/watch?v=85XU1T9DIps (Part I) # 2. https://www.youtube.com/watch?v=CfB9ROwF2ew (Part II) # In[32]: from statsmodels.tsa.seasonal import seasonal_decompose # In[33]: decomposition = seasonal_decompose(ts_log, freq=700) # In[34]: trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid # In[35]: plt.subplot(411) plt.plot(ts_log, label='Original') plt.legend(loc='best') # In[36]:
#my_plot = port_df_mean['attributes_bytes'].plot.hist() my_plot = calc_ent2['entropy'].plot.hist() # In[25]: import matplotlib.pyplot as pplt #my_plot=pplt.plot(port_df_mean['attributes_bytes']) my_plot=pplt.plot(calc_ent2['entropy']) pplt.autoscale(enable=True, axis='x', tight=None) pplt.show() # In[26]: decomposition = seasonal_decompose(calc_ent2.entropy.values, freq=24) fig = plt.figure() fig = decomposition.plot() fig.set_size_inches(15, 8) # In[44]: model=ARIMA(calc_ent2['entropy'],(1,0,0)) ## The endogenous variable needs to be type Float or you get a cast error model_fit = model.fit() # fit is a Function model_fitted = model_fit.fittedvalues # fittedvalues is a Series print(model_fit.summary()) print(model_fitted) # In[29]:
airline.plot(figsize=(10, 8)) airline['EWMA-12'] = airline['#Passengers'].ewm(span=12).mean() airline[["#Passengers", "EWMA-12"]].plot() ##ETS (Error-Trend-Seasonality) MODELS ##Exponential Smoothing ##Trend Methods Models ##ETS Decomposition airline.plot() from statsmodels.tsa.seasonal import seasonal_decompose result = seasonal_decompose(airline['#Passengers'], model='multiplicative') result.seasonal.plot() result.trend.plot() result.plot() ##ARIMA MODELS ##Step 1 df = pd.read_csv('monthly-milk-production.csv') df.head() df.columns = ["Month", "Milk in pounds per cow"] df.head() df.tail() ##to drop a row
def diagnostics(): decomposition = seasonal_decompose(view_hour['distinct_freq_sum'].values,freq=24 ) fig = decomposition.plot() fig.set_size_inches(50, 8)
def diagnostics(): decomposition = seasonal_decompose(voltage_df['rel_counts'].values,freq=24 ) fig = decomposition.plot() fig.set_size_inches(50, 8)
def test_one_sided_moving_average_in_stl_decompose(self): res_add = seasonal_decompose(self.data.values, freq=4, two_sided=False) seasonal = np.array([76.76, 90.03, -114.4, -52.4, 76.76, 90.03, -114.4, -52.4, 76.76, 90.03, -114.4, -52.4, 76.76, 90.03, -114.4, -52.4, 76.76, 90.03, -114.4, -52.4, 76.76, 90.03, -114.4, -52.4, 76.76, 90.03, -114.4, -52.4, 76.76, 90.03, -114.4, -52.4]) trend = np.array([np.nan, np.nan, np.nan, np.nan, 159.12, 204., 221.25, 245.12, 319.75, 451.5, 561.12, 619.25, 615.62, 548., 462.12, 381.12, 316.62, 264., 228.38, 210.75, 188.38, 199., 207.12, 191., 166.88, 72., -9.25, -33.12, -36.75, 36.25, 103., 131.62]) resid = np.array([np.nan, np.nan, np.nan, np.nan, 11.112, -57.031, 118.147, 136.272, 332.487, 267.469, 83.272, -77.853, -152.388, -181.031, -152.728, -152.728, -56.388, -115.031, 14.022, -56.353, -33.138, 139.969, -89.728, -40.603, -200.638, -303.031, 46.647, 72.522, 84.987, 234.719, -33.603, 104.772]) assert_almost_equal(res_add.seasonal, seasonal, 2) assert_almost_equal(res_add.trend, trend, 2) assert_almost_equal(res_add.resid, resid, 3) res_mult = seasonal_decompose(np.abs(self.data.values), 'm', freq=4, two_sided=False) seasonal = np.array([1.1985, 1.5449, 0.5811, 0.6755, 1.1985, 1.5449, 0.5811, 0.6755, 1.1985, 1.5449, 0.5811, 0.6755, 1.1985, 1.5449, 0.5811, 0.6755, 1.1985, 1.5449, 0.5811, 0.6755, 1.1985, 1.5449, 0.5811, 0.6755, 1.1985, 1.5449, 0.5811, 0.6755, 1.1985, 1.5449, 0.5811, 0.6755]) trend = np.array([np.nan, np.nan, np.nan, np.nan, 171.625, 204., 221.25, 245.125, 319.75, 451.5, 561.125, 619.25, 615.625, 548., 462.125, 381.125, 316.625, 264., 228.375, 210.75, 188.375, 199., 207.125, 191., 166.875, 107.25, 80.5, 79.125, 78.75, 116.5, 140., 157.375]) resid = np.array([np.nan, np.nan, np.nan, np.nan, 1.2008, 0.752, 1.75, 1.987, 1.9023, 1.1598, 1.6253, 1.169, 0.7319, 0.5398, 0.7261, 0.6837, 0.888, 0.586, 0.9645, 0.7165, 1.0276, 1.3954, 0.0249, 0.7596, 0.215, 0.851, 1.646, 0.2432, 1.3244, 2.0058, 0.5531, 1.7309]) assert_almost_equal(res_mult.seasonal, seasonal, 4) assert_almost_equal(res_mult.trend, trend, 2) assert_almost_equal(res_mult.resid, resid, 4) # test odd res_add = seasonal_decompose(self.data.values[:-1], freq=4, two_sided=False) seasonal = np.array([81.21, 94.48, -109.95, -65.74, 81.21, 94.48, -109.95, -65.74, 81.21, 94.48, -109.95, -65.74, 81.21, 94.48, -109.95, -65.74, 81.21, 94.48, -109.95, -65.74, 81.21, 94.48, -109.95, -65.74, 81.21, 94.48, -109.95, -65.74, 81.21, 94.48, -109.95]) trend = [np.nan, np.nan, np.nan, np.nan, 159.12, 204., 221.25, 245.12, 319.75, 451.5, 561.12, 619.25, 615.62, 548., 462.12, 381.12, 316.62, 264., 228.38, 210.75, 188.38, 199., 207.12, 191., 166.88, 72., -9.25, -33.12, -36.75, 36.25, 103.] random = [np.nan, np.nan, np.nan, np.nan, 6.663, -61.48, 113.699, 149.618, 328.038, 263.02, 78.824, -64.507, -156.837, -185.48, -157.176, -139.382, -60.837, -119.48, 9.574, -43.007, -37.587, 135.52, -94.176, -27.257, -205.087, -307.48, 42.199, 85.868, 80.538, 230.27, -38.051] assert_almost_equal(res_add.seasonal, seasonal, 2) assert_almost_equal(res_add.trend, trend, 2) assert_almost_equal(res_add.resid, random, 3)
return dt # dataset = read_csv('Load/Task 1/L1-train.csv') dataset = read_csv('train.csv') data = dataset['w1'] #plt.plot(data) #plt.show() start = datetime.strptime("Jan 1 2001 1:00AM", "%b %d %Y %I:%M%p") end = datetime.strptime("Oct 1 2010 12:00AM", "%b %d %Y %I:%M%p") data.index = pd.DatetimeIndex(freq='h', start=start, end=end) data.to_csv('train_w1.csv') decomposition = seasonal_decompose(data[:10*27], model="additive") trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid trend.plot() seasonal.plot() residual.plot() # 移动平均图 def draw_trend(timeSeries, size): f = plt.figure(facecolor='white') # 对size个数据进行移动平均 rol_mean = timeSeries.rolling(window=size).mean() # 对size个数据进行加权移动平均
# fontsize is just for the axes size unq_rel_cnts1['distinct_freq'].loc[:].plot(figsize=(40,8), fontsize=30) # #### Execute some Univariate Statistics # In[17]: unq_rel_cnts1['distinct_freq'].describe() # In[18]: decomposition = seasonal_decompose(unq_rel_cnts1['distinct_freq'].values,freq=24 ) fig = decomposition.plot() fig.set_size_inches(15, 8) # In[19]: # Graph Autocorrelation and Partial Autocorrelation data fig, axes = plt.subplots(1, 2, figsize=(15,4)) fig = sm.graphics.tsa.plot_acf(unq_rel_cnts1['distinct_freq'], lags=12, ax=axes[0]) fig = sm.graphics.tsa.plot_pacf(unq_rel_cnts1['distinct_freq'], lags=12, ax=axes[1])
df = pd.DataFrame(df.price) df.asfreq('M') # najde posledni den v mesici a vezme jeho hodnotu, pokud zadna hodnota neni (napr vikend) doplni NaN, proto lepsi pouzit nasledujici: df = df.resample('M').last() roll = 12 df['rolling_mean'] = df['price'].rolling(roll).mean() df['rolling_std'] = df['price'].rolling(roll).std() df.plot(title = 'gold price') # non-linear growth pattern can be observed -> use the multiplicative model from statsmodels.tsa.seasonal import seasonal_decompose decomposition = seasonal_decompose(df.price, model = 'multiplicative') seasonal_decompose? decomposition.plot() dir(decomposition) decomposition.resid.plot() decomposition.trend.plot() decomposition.seasonal.plot() #%% Testing for stationarity in time series import pandas as pd from statsmodels.tsa.stattools import adfuller import yfinance as yf import statsmodels.tsa.api as smt import matplotlib.pyplot as plt
def TSD(x): result = seasonal_decompose(x, model='additive', freq=1440) return result.trend, result.seasonal, result.resid
ts_log_diff = ts_log - ts_log.shift() ts_log_diff.dropna(inplace=True) plt.plot(ts_log_diff) #%% test_stationarity(ts_log_diff) ''' It is stationary because: • the mean and std variations have small variations with time. • test statistic is less than 10 percent of the critical values, so we can be 90 percent confident that this is stationary. ''' #%% # Decomposing decomp = seasonal_decompose(ts_log) trend = decomp.trend seasonal = decomp.seasonal residual = decomp.resid plt.subplot(411) plt.plot(ts_log, label='Original') plt.legend(loc='best') plt.subplot(412) plt.plot(trend, label='Trend') plt.legend(loc='best') plt.subplot(413) plt.plot(seasonal, label='Seasonal') plt.legend(loc='best') plt.subplot(414) plt.plot(residual, label='Residual')
Depending on the nature of the trend and seasonality, a time series can be modeled as an additive or multiplicative, wherein, each observation in the series can be expressed as either a sum or a product of the components: Additive time series: Value = Base Level + Trend + Seasonality + Error Multiplicative Time Series: Value = Base Level x Trend x Seasonality x Error If you look at the residuals of the multiplicatve decomposition closely, it has some pattern left over. The additive decomposition, however, looks quite random which is good. So ideally, additive decomposition should be preferred for this particular series. """ result_add = seasonal_decompose(df['PASSENGER_SUM_DAY'], model='additive', extrapolate_trend='freq') result_mul = seasonal_decompose(df['PASSENGER_SUM_DAY'], model='multiplicative', extrapolate_trend='freq') result_mul.plot().suptitle('Multiplicative Decompose', fontsize=22) plot.show() plot.close() result_add.plot().suptitle('Additive Decompose', fontsize=22) plot.show() plot.close() # DESEASON THE VARIABLE # Se ve claramente una tendencia cada 7 días de seasonality df['PASSENGER_SUM_DAY'] = np.log(df['PASSENGER_SUM_DAY']) - np.log( df['PASSENGER_SUM_DAY']).shift(7)
print 'lag_partial_correlations' print lag_partial_correlations y = lag_partial_correlations _draw_multiple_line_plot('lag_partial_correlations.html', 'lag_partial_correlations', [X], [y], ['navy'], ['lag_partial_correlations'], [None], [1], 'datetime', 'Date', 'lag_partial_correlations', y_start=-1, y_end=1) decomposition = seasonal_decompose(np.array(df['https']), model='additive', freq=30) _draw_decomposition_plot('decomposition.html', X, decomposition, 'seasonal decomposition', 'datetime', 'decomposition', width=600, height=400) model = sm.tsa.ARIMA(np.array(df['https'].iloc[1:]), order=(2,0,0)) results = model.fit(disp=-1) #predict next 10 values num_predictions = 12 predicted_dates = [] last_date = X[-1] for i in range(num_predictions): next_date = last_date + 30 predicted_dates.append(next_date) last_date = next_date #predicted_dates=np.array(['2015-10-17', '2015-12-19', '2016-03-19', '2016-06-19', '2016-09-19'], dtype=np.datetime64)