def test_nsdiffs_corner_cases(tst): # max_D must be a positive int with pytest.raises(ValueError): nsdiffs(austres, m=2, max_D=0, test=tst) # assert 0 for constant assert nsdiffs([1, 1, 1, 1], m=2, test=tst) == 0 # show fails for m <= 1 for m in (0, 1): with pytest.raises(ValueError): nsdiffs(austres, m=m, test=tst)
def stationarity_tests(data): ## function that performs stationarity test on data: """ Parameters: data: time series for which stationarity tests are performed """ return_dict = {'usual_differencing':{'ADF_test': ndiffs(data.values, test='adf'), 'KPSS_test': ndiffs(data.values, test='kpss'), 'PP_test': ndiffs(data.values, test='pp')}, 'seasonal_differencing': {'Canova-Hansen': nsdiffs(data.values, m=7, max_D=31,test='ch'), 'OCSB': nsdiffs(data.values, m=7, max_D=31,test='ocsb')}} return return_dict
def auto_pmd(self, train, test): ''' Summary Line: Create an auto_arima Extended ''' little_d = ndiffs(train, test='kpss') big_D = nsdiffs(train, m=52, max_D=12, test='ocsb') model_1 = aa.auto_arima(train, start_p=0, start_q=0, max_p=5, max_q=5, m=52, start_P=0, seasonal=True, d=little_d, D=big_D, suppress_warnings=True, stepwise=True, error_action='ignore', trace=False) predictions = model_1.predict(n_periods=len(test)) predictions = np.array(predictions) return predictions
def _check_season_length(self, season_length, target, kwargs): """Check if season_length is a working value for seasonality by performing the same test of seasonality pm.auto_arima does. The goal is for pm.auto_arima not to fail the training later. Args: season_length (int): Season length, always > 1. target (numpy.array): Target to train on. kwargs (dict): Kwargs dictionary of pm.auto_arima """ logger.info(f"Check if seasonality 'm' can be set to {season_length}") try: nsdiffs( x=target.copy(), m=season_length, test=kwargs.get("seasonal_test", "ocsb"), max_D=kwargs.get("max_D", 1), **kwargs.get("seasonal_test_args", dict()), ) except Exception as e: raise ValueError( f"Seasonality of AutoARIMA can't be set to {season_length}. Error when testing seasonality with nsdiffs: {e}" )
def test_issue_351(): y = np.array([ 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 6, 2, 1, 0, 2, 0, 1, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 6, 0, 0, 0, 0, 0, 1, 3, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0 ]) with pytest.warns(UserWarning) as w_list: D = arima_utils.nsdiffs(y, m=52, max_D=2, test='ocsb') assert D == 1 warnings_messages = pytest_warning_messages(w_list) assert len(warnings_messages) == 1 assert 'shorter than m' in warnings_messages[0]
def test_nsdiffs_on_wine(): assert nsdiffs(wineind, m=52) == 2
# Creating one dataframe for each country to include exogenous variables and PMI in the same df df_us = pd.merge(pmi_us, el_us, how='left', left_index=True, right_index=True) df_us = pd.merge(df_us, brent, how='left', left_index=True, right_index=True) df_us = pd.merge(df_us, wti, how='left', left_index=True, right_index=True) df_us = df_us.dropna() ''' Developing dynamic models (SARIMA with explanatory variables) ''' # Formally prove that only one differencing is needed df_no = df_no.dropna() ndiffs(df_no.pmi, test='adf') nsdiffs(df_no.pmi, test='ch', m=12) # Adding direction column in all data frames (1 if PMI goes up, 0 if down) df_no['dir'] = [1 if x > 0 else 0 for x in df_no.pmi - df_no.pmi.shift(1)] df_dk['dir'] = [1 if x > 0 else 0 for x in df_dk.pmi - df_dk.pmi.shift(1)] df_uk['dir'] = [1 if x > 0 else 0 for x in df_uk.pmi - df_uk.pmi.shift(1)] df_us['dir'] = [1 if x > 0 else 0 for x in df_us.pmi - df_us.pmi.shift(1)] # Need to find ARIMA terms for all countries. Using exog with only previous periods (only lags) n_test_obs = 24 # Norway df_no_train = df_no.iloc[:-n_test_obs,:] df_no_test = df_no.iloc[-n_test_obs:,:] exog_no_train = df_no_train.drop(['dir', 'eur_per_MWh', 'pmi', 'usd_per_MWh', 'usd_per_barrel_x', 'usd_per_barrel_y'], axis=1) exog_no_test = df_no_test.drop(['dir', 'eur_per_MWh', 'pmi', 'usd_per_MWh', 'usd_per_barrel_x', 'usd_per_barrel_y'], axis=1)
def test_nsdiffs_on_various(data, test, m, expected): assert nsdiffs(data, m=m, test=test, max_D=3) == expected
#The easiest way to make your data stationary in the case of ARIMA models is to allow auto_arima to work its magic, estimate the appropriate d value, and difference the time series accordingly. However, other common transformations for enforcing stationarity include (sometimes in combination with one another): # #Square root or N-th root transformations #De-trending your time series #Differencing your time series one or more times #Log transformations #%%%% from pmdarima.datasets import load_lynx from pmdarima.arima.utils import nsdiffs # load lynx lynx = load_lynx() # estimate number of seasonal differences using a Canova-Hansen test D = nsdiffs(lynx, m=10, # commonly requires knowledge of dataset max_D=12, test='ch') # -> 0 # or use the OCSB test (by default) nsdiffs(lynx, m=10, max_D=12, test='ocsb') # -> 0 #%%%The m parameter is the number of observations per seasonal cycle, and is one that must be known apriori. Typically, m will correspond to some recurrent periodicity such as: #7 - daily, 12 - monthly ,52 - weekly #Depending on how it’s set, it can dramatically impact the outcome of an ARIMA model. For instance, consider the wineind dataset when fit with m=1 vs. m=12: import pmdarima as pm
def predict_arima(df): time_in=current_milli_time() try: forecast_in = open("forecast.pickle","rb") future_forecast = pickle.load(forecast_in) forecast_in.append(df) error=[] """ Calculate errors """ if len(df) < len(future_forecast): error=df["memory_used"] - future_forecast[:len(df)]["memory_used"] elif len(df) > len(future_forecast): error=df[0:len(future_forecast)]["memory_used"]- future_forecast["memory_used"] else: error=df["memory_used"]-future_forecast["memory_used"] overestimation=[x for x in error if x<0] overestimation=sum(overestimation)/len(overestimation) underestimation=[x for x in error if x>=0] underestimation=sum(underestimation)/len(underestimation) print("UNDERESTIMATION ERROR: "+underestimation) print("OVERESTIMATION ERROR: "+overestimation) print("Mean Absolute Error in Last iteration "+str(error)) """ Overestimation & Underestimation errors """ except Exception as e: print("RMSE To be computed") # Do Nothing try: pm.plot_pacf(df,show=False).savefig('pacf.png') pm.plot_acf(df,show=False).savefig('acf.png') except: print("Data points insufficient for ACF & PACF") try: pickle_in = open("arima.pickle","rb") arima_data = pickle.load(pickle_in) arima_data.append(df) #df=arima_data except Exception as e: arima_data_out = open("arima.pickle","wb") pickle.dump([], arima_data_out) arima_data_out = open("arima.pickle","wb") pickle.dump(df, arima_data_out) arima_data_out.close() ''' tests ''' nd=1 nsd=1 try: adf_test=ADFTest(alpha=0.05) p_val, should_diff = adf_test.is_stationary(df["memory_used"]) nd = ndiffs(df, test='adf') logging.info(nd) nsd = nsdiffs(df,12) logging.info(nd) except: nd=1 print("Exception on tests") ch_test=CHTest(12) try: nsd=ch_test.estimate_seasonal_differencing_term(df) except Exception as e: print(e) logging.error(e) ''' ARIMA MODEL ''' ''' Find p,q dynamically ''' acf_lags=acf(df["memory_used"]) acf_lags_threshold=[x for x in acf_lags if x>=getThreshold()] p=len(acf_lags_threshold) if len(acf_lags_threshold)<=4 else 4 pacf_lags=pacf(df["memory_used"]) pacf_lags_threshold=[x for x in pacf_lags if x>=getThreshold()] q=len(pacf_lags_threshold) if len(pacf_lags_threshold)<=1 else 1 d=nd train, test = train_test_split(df,shuffle=False, test_size=0.3) # If data is seasonal set the values of P,D,Q in seasonal order stepwise_model = ARIMA( order=(p,d,q), seasonal_order=(0,nsd,0,12), suppress_warnings=True, scoring='mse' ) x=str(p)+" "+str(nd)+" "+str(q) print("Model with p="+str(q)+" d="+str(d)+" q="+str(q)) try: stepwise_model.fit(df) """ Vary the periods as per the forecasting window n_periods= 30 = 5mins n_periods= 60 = 10mins n_periods= 90 = 15mins """ future_forecast = stepwise_model.predict(n_periods=len(test)) future_forecast = pd.DataFrame(future_forecast,index=test.index,columns=["prediction"]) res=pd.concat([df,future_forecast],axis=1) ''' Save Forecast in Pickle ''' forecast_out = open("forecast.pickle","wb") pickle.dump(future_forecast,forecast_out) forecast_out.close() trace1 = go.Scatter(x=res.index, y=res["prediction"],name="Prediction", mode='lines') trace2 = go.Scatter(x=df.index, y=df["memory_used"],name="DF data", mode='lines') data=[trace1,trace2] layout = go.Layout( title=x ) fig = go.Figure(data=data, layout=layout) plot(fig, filename="prediction") print("Current values") print(df) print("Predicted Data Points") print(future_forecast) time_out=current_milli_time() print("TIME for RNN(ms):"+str(time_out-time_in)) return future_forecast except Exception as e: time_out=current_milli_time() print("TIME for RNN(ms):"+str(time_out-time_in)) print(e) return None
fig, ax = plt.subplots(3,1, figsize=(12,10)) ax[0] = plot_acf(x.dropna(), lags=50, ax=ax[0]) ax[1] = plot_pacf(x.dropna(), lags=50, ax=ax[1]) ax[2].plot(x) ax[2].set_title("Data") plt.savefig('OUTFILES/M4-Sarima_Autocorr_saison_diff_12_ARA2.png', dpi=100, bbox_inches='tight') plt.show() # In[35]: from pmdarima.arima.utils import nsdiffs # estimate number of seasonal differences using an OCSB test (by default) n_Docsb = nsdiffs(ara2, m=12, max_D=12, test='ocsb') print("Nombre diff D = ",n_Docsb, " basé sur param OCSB") # <font size=4 color="darkblue"><b>2. Identification, estimation et validation de modèles</b></font> # # ### d = 0 & D = 1 # ### Détermination des "termes" AR et MA # Nous savons qu'il y a reste encore des pics importants dans les autocorrélogrammes ACF et PACF. # Il faut donc décider quels termes AR et MA ajouter. # >Création d'un algorithme de recherche basé sur les combinaisons possibles des termes AR et MA # Valeurs possibles pour chaque terme p,q : $[0, 2]$, car trop long sinon ... d = 0 / D = 1 # # #### ESTIMATION
# %% modelo_busca.fit(cresc_p1['Preco'].values) # %% valores_preditos = modelo_busca.predict(n_periods=10) # %% plt.plot(cresc_p2['Data'], valores_preditos) plt.plot(cresc_p2['Data'], cresc_p2['Preco']) # %% from pmdarima.arima.utils import nsdiffs # %% D = nsdiffs(cresc_p1['Preco'].values, m=2, max_D=12, test='ch') # %% modelo_busca2 = auto_arima(cresc_p1['Preco'].values, start_p=0, start_q=0, max_p=6, max_q=6, d=1, D=1, start_Q=1, start_P=1, max_Q=4, max_P=4, m=2, seasonal=True,