Пример #1
0
def ARIMA_forecasting():
    if normalize_data:
        substring = 'normalized_'+PH+'min'
    else:
        substring = PH+'min'

    if dataset == 'oaps':
        print('Getting data from ', data_directory + seg + '\n')
        unpickled_train_data = unpickle_data(data_directory + seg + 'windowed_train_' + substring + '.pickle') #e.g. windowed_train_normalized_60min.pickle
        unpickled_test_data = unpickle_data(data_directory + seg + 'windowed_test_' + substring + '.pickle') 
    elif dataset == 'ohio':
        unpickled_train_data = unpickle_data(data_directory + 'OhioT1DM-training/imputed/'+'windowed_' + substring + '.pickle') #e.g. windowed_normalized_60min.pickle
        unpickled_test_data = unpickle_data(data_directory + 'OhioT1DM-testing/imputed/'+'windowed_' + substring + '.pickle')
    
    subjs = list(unpickled_train_data.keys())
    random.shuffle(subjs)

    testScores = list()
    subjects = list()
    i = 0
    
    for subj in subjs:
        i = i + 1
        print('----------Training on subject: ',subj,'----------')
        print('----------Subject: ',i,'/',len(subjs),'----------')
        df_train = unpickled_train_data[subj]
        df_test = unpickled_test_data[subj]
        df = pd.concat([df_train, df_test], axis=0)
        
        X,y = process_data(df)
        forecasts = list()
        n = int(0.2*len(X))
      
        for j in range(100,n):
            data = np.hstack(X[j-100:j])
            #model = sm.tsa.statespace.SARIMAX(X[j], trend='c', order=(1,1,0), enforce_stationarity=False, initialization='approximate_diffuse',enforce_invertibility=True)
            model = ARIMA(data, order=(1,1,0))
            #model = pm.auto_arima(data, start_p=1, start_q=1,
                    #test='adf',       # use adftest to find optimal 'd'
                    #max_p=3, max_q=3, # maximum p and q
                    #d=0,
                    #max_d=0, 
                    #m=1,              # frequency of series 
                    #seasonal=False,   # No Seasonality
                    #start_P=0,
                    #trace=True,
                    #error_action='ignore',  
                    #suppress_warnings=True)

            try:
            	model_fit = model.fit(disp=0)
            	output = model_fit.forecast(steps=6)[0]
            	yhat = output[-1]
            except:
                yhat = X[-1]
            print('----------Row: ',j,'/',n,'------Subject: ',i,'/',len(subjs),'----------')
           
            forecasts.append(yhat)
        try:
            forecasts = [ int(x) for x in forecasts ]
            error = math.sqrt(mean_squared_error(y[100:n], forecasts)) 
            print('Test RMSE: %.3f' % error)
            testScores.append(error)
            subjects.append(subj)
        except:
            continue
        

    results_df = pd.DataFrame(list(zip(subjects,testScores)),columns=['Subject','RMSE'])
    results_df.sort_values(by=['Subject'], inplace = True)      
    return results_df
Пример #2
0
#ARIMA
#-----------------------------
#%
#Autoregressive Integrated Moving Average (ARIMA)
#The Autoregressive Integrated Moving Average (ARIMA) method models the next step in the sequence as a linear function of the differenced observations and residual errors at prior time steps.
#
#It combines both Autoregression (AR) and Moving Average (MA) models as well as a differencing pre-processing step of the sequence to make the sequence stationary, called integration (I).
#
#The notation for the model involves specifying the order for the AR(p), I(d), and MA(q) models as parameters to an ARIMA function, e.g. ARIMA(p, d, q). An ARIMA model can also be used to develop AR, MA, and ARMA models.
#
#The method is suitable for univariate time series with trend and without seasonal components

# ARIMA example
from statsmodels.tsa.arima_model import ARIMA
from random import random

# contrived dataset
data = [x + random() for x in range(1, 100)]
data
# fit model
model = ARIMA(data, order=(1, 1, 1))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.predict(len(data), len(data), typ='levels')
print(yhat)
plt.plot(lag_pacf)
plt.axhline(y=0,linestyle="--",color='gray')
plt.axhline(y=-1.96/np.sqrt(len(timeseries)),linestyle="--",color='gray')
plt.axhline(y=1.96/np.sqrt(len(timeseries)),linestyle="--",color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()


# In[173]:


from statsmodels.tsa.arima_model import ARIMA
timeseries=indexedtslog

#AR Model
model =ARIMA(timeseries,order = (1,1,1), freq= 'W-Fri')
ARIMAresult= model.fit(disp=-1)
plt.plot(indexedtslogdiffshift)
plt.plot(ARIMAresult.fittedvalues, color='red')
plt.title ('RSS: %.4f'%sum((ARIMAresult.fittedvalues-indexedtslogdiffshift[seriescol])**2))
print ('plotting ARIMA Model')


# In[174]:


#Fitting of timeseries model
pred_ARIMA_diff = pd.Series(ARIMAresult.fittedvalues, copy =True)
pred_ARIMA_diff.head()