Пример #1
0
def test_nsdiffs_corner_cases(tst):
    # max_D must be a positive int
    with pytest.raises(ValueError):
        nsdiffs(austres, m=2, max_D=0, test=tst)

    # assert 0 for constant
    assert nsdiffs([1, 1, 1, 1], m=2, test=tst) == 0

    # show fails for m <= 1
    for m in (0, 1):
        with pytest.raises(ValueError):
            nsdiffs(austres, m=m, test=tst)
Пример #2
0
def stationarity_tests(data):

    ## function that performs stationarity test on data:
    """ Parameters:
    data: time series for which stationarity tests are performed
    """
    return_dict = {'usual_differencing':{'ADF_test': ndiffs(data.values, test='adf'),
                                        'KPSS_test': ndiffs(data.values, test='kpss'),
                                        'PP_test': ndiffs(data.values, test='pp')},
                    'seasonal_differencing': {'Canova-Hansen': nsdiffs(data.values, m=7, max_D=31,test='ch'),
                                                'OCSB': nsdiffs(data.values, m=7, max_D=31,test='ocsb')}}
    return return_dict
Пример #3
0
    def auto_pmd(self, train, test):
        '''
		Summary Line: Create an auto_arima

		Extended

		'''
        little_d = ndiffs(train, test='kpss')
        big_D = nsdiffs(train, m=52, max_D=12, test='ocsb')

        model_1 = aa.auto_arima(train,
                                start_p=0,
                                start_q=0,
                                max_p=5,
                                max_q=5,
                                m=52,
                                start_P=0,
                                seasonal=True,
                                d=little_d,
                                D=big_D,
                                suppress_warnings=True,
                                stepwise=True,
                                error_action='ignore',
                                trace=False)

        predictions = model_1.predict(n_periods=len(test))
        predictions = np.array(predictions)
        return predictions
Пример #4
0
    def _check_season_length(self, season_length, target, kwargs):
        """Check if season_length is a working value for seasonality by performing the same test of seasonality pm.auto_arima does.
        The goal is for pm.auto_arima not to fail the training later.

        Args:
            season_length (int): Season length, always > 1.
            target (numpy.array): Target to train on.
            kwargs (dict): Kwargs dictionary of pm.auto_arima

        """
        logger.info(f"Check if seasonality 'm' can be set to {season_length}")
        try:
            nsdiffs(
                x=target.copy(),
                m=season_length,
                test=kwargs.get("seasonal_test", "ocsb"),
                max_D=kwargs.get("max_D", 1),
                **kwargs.get("seasonal_test_args", dict()),
            )
        except Exception as e:
            raise ValueError(
                f"Seasonality of AutoARIMA can't be set to {season_length}. Error when testing seasonality with nsdiffs: {e}"
            )
Пример #5
0
def test_issue_351():
    y = np.array([
        1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 6, 2, 1, 0, 2, 0, 1, 0,
        0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 6, 0, 0, 0, 0, 0, 1,
        3, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0
    ])

    with pytest.warns(UserWarning) as w_list:
        D = arima_utils.nsdiffs(y, m=52, max_D=2, test='ocsb')

    assert D == 1

    warnings_messages = pytest_warning_messages(w_list)
    assert len(warnings_messages) == 1
    assert 'shorter than m' in warnings_messages[0]
Пример #6
0
def test_nsdiffs_on_wine():
    assert nsdiffs(wineind, m=52) == 2
Пример #7
0
# Creating one dataframe for each country to include exogenous variables and PMI in the same df
df_us = pd.merge(pmi_us, el_us, how='left', left_index=True, right_index=True)
df_us = pd.merge(df_us, brent, how='left', left_index=True, right_index=True)
df_us = pd.merge(df_us, wti, how='left', left_index=True, right_index=True)
df_us = df_us.dropna()



'''
Developing dynamic models (SARIMA with explanatory variables) 
'''
# Formally prove that only one differencing is needed
df_no = df_no.dropna()
ndiffs(df_no.pmi, test='adf')
nsdiffs(df_no.pmi, test='ch', m=12)

# Adding direction column in all data frames (1 if PMI goes up, 0 if down)
df_no['dir'] = [1 if x > 0 else 0 for x in df_no.pmi - df_no.pmi.shift(1)]
df_dk['dir'] = [1 if x > 0 else 0 for x in df_dk.pmi - df_dk.pmi.shift(1)]
df_uk['dir'] = [1 if x > 0 else 0 for x in df_uk.pmi - df_uk.pmi.shift(1)]
df_us['dir'] = [1 if x > 0 else 0 for x in df_us.pmi - df_us.pmi.shift(1)]


# Need to find ARIMA terms for all countries. Using exog with only previous periods (only lags)
n_test_obs = 24
# Norway
df_no_train = df_no.iloc[:-n_test_obs,:]
df_no_test = df_no.iloc[-n_test_obs:,:]
exog_no_train = df_no_train.drop(['dir', 'eur_per_MWh', 'pmi', 'usd_per_MWh', 'usd_per_barrel_x', 'usd_per_barrel_y'], axis=1)
exog_no_test = df_no_test.drop(['dir', 'eur_per_MWh', 'pmi', 'usd_per_MWh', 'usd_per_barrel_x', 'usd_per_barrel_y'], axis=1)
Пример #8
0
def test_nsdiffs_on_various(data, test, m, expected):
    assert nsdiffs(data, m=m, test=test, max_D=3) == expected
Пример #9
0
#The easiest way to make your data stationary in the case of ARIMA models is to allow auto_arima to work its magic, estimate the appropriate d value, and difference the time series accordingly. However, other common transformations for enforcing stationarity include (sometimes in combination with one another):
#
#Square root or N-th root transformations
#De-trending your time series
#Differencing your time series one or more times
#Log transformations
#%%%%
from pmdarima.datasets import load_lynx
from pmdarima.arima.utils import nsdiffs

# load lynx
lynx = load_lynx()

# estimate number of seasonal differences using a Canova-Hansen test
D = nsdiffs(lynx,
            m=10,  # commonly requires knowledge of dataset
            max_D=12,
            test='ch')  # -> 0

# or use the OCSB test (by default)
nsdiffs(lynx,
        m=10,
        max_D=12,
        test='ocsb') 
 # -> 0
 
#%%%The m parameter is the number of observations per seasonal cycle, and is one that must be known apriori. Typically, m will correspond to some recurrent periodicity such as:
#7 - daily, 12 - monthly ,52 - weekly
#Depending on how it’s set, it can dramatically impact the outcome of an ARIMA model. For instance, consider the wineind dataset when fit with m=1 vs. m=12:

import pmdarima as pm
Пример #10
0
def predict_arima(df):

    time_in=current_milli_time()
    try:
        forecast_in = open("forecast.pickle","rb")
        future_forecast = pickle.load(forecast_in)
        forecast_in.append(df)
        error=[]
        """
        Calculate errors
        """
        if len(df) < len(future_forecast):
            error=df["memory_used"] - future_forecast[:len(df)]["memory_used"]
        elif len(df) > len(future_forecast):
            error=df[0:len(future_forecast)]["memory_used"]- future_forecast["memory_used"]
        else:
            error=df["memory_used"]-future_forecast["memory_used"]
        overestimation=[x for x in error if x<0]
        overestimation=sum(overestimation)/len(overestimation)
        underestimation=[x for x in error if x>=0]
        underestimation=sum(underestimation)/len(underestimation)
        print("UNDERESTIMATION ERROR: "+underestimation)
        print("OVERESTIMATION ERROR: "+overestimation)
        print("Mean Absolute Error in Last iteration "+str(error))
        """
        Overestimation & Underestimation errors
        """



    except Exception as e:
        print("RMSE To be computed")
        # Do Nothing
  
    try:
        pm.plot_pacf(df,show=False).savefig('pacf.png')
        pm.plot_acf(df,show=False).savefig('acf.png')
    except:
        print("Data points insufficient for ACF & PACF")


    try:
        pickle_in = open("arima.pickle","rb")
        arima_data = pickle.load(pickle_in)
        arima_data.append(df)
        #df=arima_data
    except Exception as e:
        arima_data_out = open("arima.pickle","wb")    
        pickle.dump([], arima_data_out)
    arima_data_out = open("arima.pickle","wb")
    pickle.dump(df, arima_data_out)
    arima_data_out.close()
    
    '''
    tests 
    '''
    nd=1
    nsd=1
    try:
        adf_test=ADFTest(alpha=0.05)
        p_val, should_diff = adf_test.is_stationary(df["memory_used"])    

        nd = ndiffs(df, test='adf')
        logging.info(nd)
        nsd = nsdiffs(df,12)
        logging.info(nd)
    except:
        nd=1
        print("Exception on tests")

    ch_test=CHTest(12)
    
    try:
        nsd=ch_test.estimate_seasonal_differencing_term(df)
    except Exception as e:
        print(e)
        logging.error(e)
    

    '''
        ARIMA MODEL
    '''

    '''
        Find p,q dynamically
    '''
    acf_lags=acf(df["memory_used"])
    acf_lags_threshold=[x for x in acf_lags if x>=getThreshold()]
    p=len(acf_lags_threshold) if len(acf_lags_threshold)<=4 else 4

    pacf_lags=pacf(df["memory_used"])
    pacf_lags_threshold=[x for x in pacf_lags if x>=getThreshold()]
    q=len(pacf_lags_threshold) if len(pacf_lags_threshold)<=1 else 1
    d=nd

    train, test = train_test_split(df,shuffle=False, test_size=0.3)

    # If data is seasonal set the values of P,D,Q in seasonal order
    stepwise_model = ARIMA(
        order=(p,d,q),
        seasonal_order=(0,nsd,0,12),
        suppress_warnings=True,
        scoring='mse'
    )
    x=str(p)+" "+str(nd)+" "+str(q)
    print("Model with p="+str(q)+" d="+str(d)+" q="+str(q))

    try:

        stepwise_model.fit(df)
        """ 
          Vary the periods as per the forecasting window 
          n_periods= 30 = 5mins
          n_periods= 60 = 10mins
          n_periods= 90 = 15mins
        """
        future_forecast = stepwise_model.predict(n_periods=len(test))
        future_forecast = pd.DataFrame(future_forecast,index=test.index,columns=["prediction"])

        res=pd.concat([df,future_forecast],axis=1)

        '''
            Save Forecast in Pickle 
        '''
        forecast_out = open("forecast.pickle","wb")
        pickle.dump(future_forecast,forecast_out)
        forecast_out.close()
        
        trace1 = go.Scatter(x=res.index, y=res["prediction"],name="Prediction", mode='lines')
        trace2 = go.Scatter(x=df.index, y=df["memory_used"],name="DF data", mode='lines')
        data=[trace1,trace2]
        layout = go.Layout(
            title=x
        )
        fig = go.Figure(data=data, layout=layout)
        plot(fig, filename="prediction")
        print("Current values")
        print(df)
        print("Predicted Data Points")
        print(future_forecast)
        time_out=current_milli_time()
        print("TIME for RNN(ms):"+str(time_out-time_in))
        return future_forecast
    except Exception as e:
        time_out=current_milli_time()
        print("TIME for RNN(ms):"+str(time_out-time_in))
        print(e)
        return None
Пример #11
0
fig, ax = plt.subplots(3,1, figsize=(12,10))
ax[0] = plot_acf(x.dropna(), lags=50, ax=ax[0])
ax[1] = plot_pacf(x.dropna(), lags=50, ax=ax[1])
ax[2].plot(x)
ax[2].set_title("Data")
plt.savefig('OUTFILES/M4-Sarima_Autocorr_saison_diff_12_ARA2.png', dpi=100, bbox_inches='tight')
plt.show()


# In[35]:


from pmdarima.arima.utils import nsdiffs

# estimate number of seasonal differences using an OCSB test (by default)
n_Docsb = nsdiffs(ara2, m=12, max_D=12, test='ocsb') 
print("Nombre diff D = ",n_Docsb, "  basé sur param OCSB")


# <font size=4 color="darkblue"><b>2. Identification, estimation et validation de modèles</b></font>
# 

# ### d = 0  &  D = 1

# ### Détermination des "termes" AR et MA 
# Nous savons qu'il y a reste encore des pics importants dans les autocorrélogrammes ACF et PACF.  
# Il faut donc décider quels termes AR et MA ajouter.  
# >Création d'un algorithme de recherche basé sur les combinaisons possibles des termes AR et MA   
# Valeurs possibles pour chaque terme p,q : $[0, 2]$, car trop long sinon ...  d = 0 / D = 1
# 
# #### ESTIMATION
Пример #12
0
# %%
modelo_busca.fit(cresc_p1['Preco'].values)

# %%
valores_preditos = modelo_busca.predict(n_periods=10)

# %%
plt.plot(cresc_p2['Data'], valores_preditos)
plt.plot(cresc_p2['Data'], cresc_p2['Preco'])

# %%
from pmdarima.arima.utils import nsdiffs

# %%
D = nsdiffs(cresc_p1['Preco'].values, m=2, max_D=12, test='ch')

# %%
modelo_busca2 = auto_arima(cresc_p1['Preco'].values,
                           start_p=0,
                           start_q=0,
                           max_p=6,
                           max_q=6,
                           d=1,
                           D=1,
                           start_Q=1,
                           start_P=1,
                           max_Q=4,
                           max_P=4,
                           m=2,
                           seasonal=True,