def arima_hw(train, test, batch=7, freq=24): test_pred = pd.DataFrame( data=None, index=test.index, columns=test.columns ) # prepare dataframe template for out of sample prediction on test set for (i, train_day, test_day) in [ (i, dp.split(train, nsplits=7)[i], dp.split(test, nsplits=7)[i]) for i in dp.split(train, nsplits=7) ]: # for each day test_day_pred = arima( train_day, test_day, hor=24, batch=batch, freq=freq) # predict for all hours of the respective day test_pred.iloc[ i:: 7, :] = test_day_pred # fill corresponding rows with out of sample predictions return test_pred
def sarimax(train,test): train_pred=pd.DataFrame(data=None,index=train.index,columns=train.columns) # in sample predictions on train set test_pred=pd.DataFrame(data=None,index=test.index,columns=test.columns) # out of sample prediction on test set for (i,train_day,test_day) in [(i, dp.split(train,nsplits=7)[i], dp.split(test,nsplits=7)[i]) for i in dp.split(train,nsplits=7)]: # for each day train_pred_day=pd.DataFrame(data=None,index=train_day.index,columns=train_day.columns) # in sample predictions on train set test_pred_day=pd.DataFrame(data=None,index=test_day.index,columns=test_day.columns) # out of sample prediction on test set for hour in train_day: # for each hour in a day train_day_hour=train_day[hour] # train samples for particular hour test_day_hour=test_day[hour] # test samples for particular hour model_train = SARIMAX(train_day_hour, order=(0,1,1),seasonal_order=(0,1,1,7),trend='c',measurement_error=True).fit() # train model model_test=SARIMAX(pd.concat([train_day_hour,test_day_hour]), order=(0,1,1),seasonal_order=(0,1,1,7),trend='c',measurement_error=True).filter(model_train.params) # workaround for rolling day ahead forecast train_pred_day[hour]=model_test.predict(start=0,end=len(train_day)-1) # predict in sample on train set test_pred_day[hour]=model_test.predict(start=len(train_day)) # predict out of sample on test set train_pred.iloc[i::7,:]=train_pred_day # fill corresponding rows with in sample predictions test_pred.iloc[i::7,:]=test_pred_day # fill corresponding rows with out of sample predictions return train_pred,test_pred
for i in range(1, 50): pred = targets.rolling(window=i).mean().shift(1) load = pd.concat({'pred': pred, 'targets': targets}, axis=1) load.dropna(inplace=True) print( r2_score(y_pred=load['pred'], y_true=load['targets'], multioutput='uniform_average')) # moving average for separated days for w in range(1, 50): # optimise window size pred = pd.DataFrame( data=None, index=targets.index, columns=targets.columns) # initialize predictions to Nans for (i, day) in dp.split(data=targets, nsplits=2).items(): # for each day pred.iloc[i::2, :] = day.rolling(window=w).mean().shift( 1 ) # assign predictions to corresponding rows, shift to exclude current day load = pd.concat({ 'pred': pred, 'targets': targets }, axis=1) # join targets and predictions into one dataset load.dropna( inplace=True ) # drop a couple of rows with Nans at the beginning produced by moving average print( r2_score(y_pred=load['pred'], y_true=load['targets'], multioutput='uniform_average'))
idx='datetime') # save imputed data # AGGREGATE DATA & CREATE TRAIN & TEST SETS exp_dir = 'C:/Users/SABA/Google Drive/mtsg/data/train_test/' # directory for the results data = dp.load(path=data_dir + 'data_imp.csv', idx='datetime', cols='load', dates=True) # load imputed data data = dp.resample(data, freq=1440) # aggregate minutes to half-hours train, test = dp.train_test(data=data, test_size=0.255, base=7) # split into train & test sets dp.save(data=train, path=exp_dir + 'train.csv', idx='date') # save train set dp.save(data=test, path=exp_dir + 'test.csv', idx='date') # save test set dp.save_dict( dic=dp.split(train, nsplits=7), path=exp_dir + 'train_', idx='date' ) # split train set according to weekdays and save each into a separate file dp.save_dict( dic=dp.split(test, nsplits=7), path=exp_dir + 'test_', idx='date' ) # split test set according to weekdays and save each into a separate file # WEATHER DATA data_dir = 'C:/Users/SABA/Google Drive/mtsg/data/' # directory containing data # downloading weather in parts due to the limit on API requests (only 500 per day) dates = pd.DatetimeIndex( data.index).strftime('%Y%m%d')[:400] # first part of dates dp.dl_save_w(dates, data_dir + 'weather_1.csv') # save first part dates = pd.DatetimeIndex( data.index).strftime('%Y%m%d')[400:800] # second part of dates
test_pred = arima_vw(train, test, batch=7, freq=52) r2_score(y_true=test, y_pred=test_pred, multioutput='uniform_average') dp.save(data=test_pred, path='C:/Users/SABA/Google Drive/mtsg/data/arima_vw.csv') # horizontal test_pred = arima(train, test, hor=24, batch=7, freq=24) r2_score(y_true=test, y_pred=test_pred, multioutput='uniform_average') dp.save(data=test_pred, path='C:/Users/SABA/Google Drive/mtsg/data/arima_h.csv') # horizontal week test_pred = arima_hw(train, test, batch=7, freq=52) r2_score(y_true=test, y_pred=test_pred, multioutput='uniform_average') dp.save(data=test_pred, path='C:/Users/SABA/Google Drive/mtsg/data/arima_hw.csv') train = dp.split(train, nsplits=7)[1] test = dp.split(test, nsplits=7)[1] train.to_csv( path_or_buf= 'C:/Users/SABA/Google Drive/mtsg/code/load_forecast/data/load_train.csv', header=True, sep=',', decimal='.') test.to_csv( path_or_buf= 'C:/Users/SABA/Google Drive/mtsg/code/load_forecast/data/load_test.csv', header=True, sep=',', decimal='.') targets.to_csv(
def ets_vw(train,test,batch=7,freq=52): test_pred=pd.DataFrame(data=None,index=test.index,columns=test.columns) # template structure for dataframe for predictions for (i,train_day,test_day) in [(i, dp.split(train,nsplits=7)[i], dp.split(test,nsplits=7)[i]) for i in dp.split(train,nsplits=7)]: # for each day test_day_pred=ets_v(train_day,test_day,hor=1,batch=batch,freq=freq) # predict for all hours of the respective day test_pred.iloc[i::7]=test_day_pred # fill corresponding rows with out of sample predictions return test_pred