def arima_2(train, test, hor=24, batch=7, freq=24): pandas2ri.activate() forecast = importr('forecast') # forecast package ts = ro.r.ts # R time series test_pred = pd.DataFrame( data=None, index=test.index, columns=test.columns ) # template structure for dataframe for predictions for i in tqdm(range(len(test))): # for each sample in test set tqdm.write('Step: %i' % i) test_ts = ts(dp.flatten(pd.concat([train, test[:i]])), frequency=freq ) # add a new day from test set to the current train set if i == 0: # if in the first iteration model = forecast.auto_arima( test_ts) # find best model on train test (i.e. original model) sign = forecast.arimaorder( model) # retrieve the order of this best model order = sign[0:3] # AR, differencing & MA orders seasonal = sign[3:6] if len(sign) >= 6 else ro.IntVector( (0, 0, 0)) # seasonal components else: # not the first iteration if i % batch == 0: # if its time to retrain gc.collect( ) # python does not have direct access to R objects, thus garbage collection does not trigger often enough model = forecast.Arima( test_ts, order=order, seasonal=seasonal, method="CSS-ML" ) # train Arima model on current (expanded) train set using orders found on the original train set else: # it is not the time to retrain model = forecast.Arima( test_ts, model=model ) # do not train, use current model with new observations test_pred.iloc[i, :] = pandas2ri.ri2py( forecast.forecast(model, h=hor).rx2('mean')) # predict new values return test_pred
def ets(train,test,hor=24,batch=7,freq=24): pandas2ri.activate() forecast=importr('forecast') # forecast package ts=ro.r.ts # R time series test_pred=pd.DataFrame(data=None,index=test.index,columns=test.columns) # template structure for dataframe for predictions for i in tqdm(range(len(test))): # for each sample in test set test_ts=ts(dp.flatten(pd.concat([train,test[:i]])),frequency=freq) # add new observation from test set to the current train set if i%batch==0: # if its time to retrain gc.collect() # python does not have direct access to R objects, thus garbage collection does not trigger often enough model=forecast.ets(test_ts) # find best model on train test (i.e. original model) else: # it is not the time to retrain model=forecast.ets(test_ts,model=model) # do not train, use current model with new observations test_pred.iloc[i]=pandas2ri.ri2py(forecast.forecast(model,h=hor).rx2('mean')) # predict new values return test_pred
def ets_2(train,test,hor=24,freq=24): pandas2ri.activate() forecast=importr('forecast') # forecast package ts=ro.r.ts # R time series test_pred=test.copy() test_pred[:]=np.NaN for i in tqdm(range(len(test))): # for each sample in test set test_ts=ts(dp.flatten(pd.concat([train,test[:i]])),frequency=freq) # add a new day from test set to the current train set if i==0: # if in the first iteration model=forecast.ets(test_ts) # find best model on train test (i.e. original model) type=ro.r.paste(model.rx2('components')[0:3],collapse='') # retrieve the type of this model damped=True if model.rx2('components')[3]=='TRUE' else False # retrieve the damped parameter of this model else: # not the first iteration model=forecast.ets(test_ts,model=type,damped=damped) # train the best (on training set) ets model on current (expanded) train set test_pred.iloc[i,:]=pd.Series(pandas2ri.ri2py(forecast.forecast(model,h=hor).rx2('mean'))) # predict all 24 hours for a new day & convert to pandas DataFrame return test_pred
def ets_4(train,test,hor=24,batch=7,freq=24): pandas2ri.activate() forecast=importr('forecast') # forecast package ts=ro.r.ts # R time series test_pred=pd.DataFrame(data=None,index=test.index,columns=test.columns) # template structure for dataframe for predictions for i in tqdm(range(len(test))): # for each sample in test set test_ts=ts(dp.flatten(pd.concat([train,test[:i]])),frequency=freq) # add new observation from test set to the current train set if i==0: # if in the first iteration, i.e. no model fitted model=forecast.ets(test_ts) # find best model on train test (i.e. original model) sign=ro.r.paste(model.rx2('components')[0:3],collapse='') # retrieve the signature of this model damped=True if model.rx2('components')[3]=='TRUE' else False # retrieve the damped parameter of this model else: # not the first iteration if i%batch==0: # if its time to retrain gc.collect() # python does not have direct access to R objects, thus garbage collection does not trigger often enough model=forecast.ets(test_ts,model=sign,damped=damped) # train the best (on training set) ets model on current (expanded) train set else: # it is not the time to retrain model=forecast.ets(test_ts,model=model) # do not train, use current model with new observations test_pred.iloc[i]=pandas2ri.ri2py(forecast.forecast(model,h=hor).rx2('mean')) # predict new values return test_pred
def ets_3(train,test,hor=24,batch=7,freq=24): pandas2ri.activate() forecast=importr('forecast') # forecast package ts=ro.r.ts # R time series fitted=ro.r('fitted') # function exporting forecasts used while fitting model test_pred=test.copy() # template structure for dataframe for predictions test_pred[:]=np.NaN # fill with temporary nans nbatches=-(-len(test)//batch) # number of bathes, round up without any imports or conversions for i in tqdm(range(nbatches-1)): # for each batch train_ts=ts(dp.flatten(pd.concat([train,test[:i*batch]])),frequency=freq) # add a new day from test set to the current train set end= (i+1)*batch if (i+1)*batch<len(test) else len(test) # index of the last observation to consider test_ts=test[i*batch:end] # make new test set if i==0: # if in the first iteration model=forecast.ets(train_ts) # find best model on train test (i.e. original model) sign=ro.r.paste(model.rx2('components')[0:3],collapse='') # retrieve the signature of this model damped=True if model.rx2('components')[3]=='TRUE' else False # retrieve the damped parameter of this model else: # not the first iteration model=forecast.ets(train_ts,model=sign,damped=damped) # train the best (on training set) ets model on current (expanded) train set pred_new=pandas2ri.ri2py(fitted(forecast.ets(test_ts,model=model))) # predict values for the new batch for j in range(len(pred_new)): tqdm.write(i*batch+j) # temp check test_pred.iloc[i*batch+j]=pred_new.iloc(j)# assign new predictions to corresponding rows return test_pred