Пример #1
0
def arima_hw(train, test, batch=7, freq=24):
    test_pred = pd.DataFrame(
        data=None, index=test.index, columns=test.columns
    )  # prepare dataframe template for out of sample prediction on test set
    for (i, train_day, test_day) in [
        (i, dp.split(train, nsplits=7)[i], dp.split(test, nsplits=7)[i])
            for i in dp.split(train, nsplits=7)
    ]:  # for each day
        test_day_pred = arima(
            train_day, test_day, hor=24, batch=batch,
            freq=freq)  # predict for all hours of the respective day
        test_pred.iloc[
            i::
            7, :] = test_day_pred  # fill corresponding rows with out of sample predictions
    return test_pred
Пример #2
0
def sarimax(train,test):
	train_pred=pd.DataFrame(data=None,index=train.index,columns=train.columns) # in sample predictions on train set
	test_pred=pd.DataFrame(data=None,index=test.index,columns=test.columns) # out of sample prediction on test set	
	for (i,train_day,test_day) in [(i, dp.split(train,nsplits=7)[i], dp.split(test,nsplits=7)[i]) for i in dp.split(train,nsplits=7)]: # for each day
		train_pred_day=pd.DataFrame(data=None,index=train_day.index,columns=train_day.columns) # in sample predictions on train set
		test_pred_day=pd.DataFrame(data=None,index=test_day.index,columns=test_day.columns) # out of sample prediction on test set
		for hour in train_day: # for each hour in a day
			train_day_hour=train_day[hour] # train samples for particular hour
			test_day_hour=test_day[hour] # test samples for particular hour
			model_train = SARIMAX(train_day_hour, order=(0,1,1),seasonal_order=(0,1,1,7),trend='c',measurement_error=True).fit() # train model
			model_test=SARIMAX(pd.concat([train_day_hour,test_day_hour]), order=(0,1,1),seasonal_order=(0,1,1,7),trend='c',measurement_error=True).filter(model_train.params) # workaround for rolling day ahead forecast
			train_pred_day[hour]=model_test.predict(start=0,end=len(train_day)-1) # predict in sample on train set
			test_pred_day[hour]=model_test.predict(start=len(train_day)) # predict out of sample on test set
		train_pred.iloc[i::7,:]=train_pred_day # fill corresponding rows with in sample predictions
		test_pred.iloc[i::7,:]=test_pred_day # fill corresponding rows with out of sample predictions
	return train_pred,test_pred
Пример #3
0
Файл: ma.py Проект: lulzzz/mtsg
for i in range(1, 50):
    pred = targets.rolling(window=i).mean().shift(1)
    load = pd.concat({'pred': pred, 'targets': targets}, axis=1)
    load.dropna(inplace=True)
    print(
        r2_score(y_pred=load['pred'],
                 y_true=load['targets'],
                 multioutput='uniform_average'))

# moving average for separated days

for w in range(1, 50):  # optimise window size
    pred = pd.DataFrame(
        data=None, index=targets.index,
        columns=targets.columns)  # initialize predictions to Nans
    for (i, day) in dp.split(data=targets, nsplits=2).items():  # for each day
        pred.iloc[i::2, :] = day.rolling(window=w).mean().shift(
            1
        )  # assign predictions to corresponding rows, shift to exclude current day
    load = pd.concat({
        'pred': pred,
        'targets': targets
    }, axis=1)  # join targets and predictions into one dataset
    load.dropna(
        inplace=True
    )  # drop a couple of rows with Nans at the beginning produced by moving average
    print(
        r2_score(y_pred=load['pred'],
                 y_true=load['targets'],
                 multioutput='uniform_average'))
Пример #4
0
        idx='datetime')  # save imputed data

# AGGREGATE DATA & CREATE TRAIN & TEST SETS
exp_dir = 'C:/Users/SABA/Google Drive/mtsg/data/train_test/'  # directory for the results
data = dp.load(path=data_dir + 'data_imp.csv',
               idx='datetime',
               cols='load',
               dates=True)  # load imputed data

data = dp.resample(data, freq=1440)  # aggregate minutes to half-hours
train, test = dp.train_test(data=data, test_size=0.255,
                            base=7)  # split into train & test sets
dp.save(data=train, path=exp_dir + 'train.csv', idx='date')  # save train set
dp.save(data=test, path=exp_dir + 'test.csv', idx='date')  # save test set
dp.save_dict(
    dic=dp.split(train, nsplits=7), path=exp_dir + 'train_', idx='date'
)  # split train set according to weekdays and save each into a separate file
dp.save_dict(
    dic=dp.split(test, nsplits=7), path=exp_dir + 'test_', idx='date'
)  # split test set according to weekdays and save each into a separate file

# WEATHER DATA

data_dir = 'C:/Users/SABA/Google Drive/mtsg/data/'  # directory containing data

# downloading weather in parts due to the limit on API requests (only 500 per day)
dates = pd.DatetimeIndex(
    data.index).strftime('%Y%m%d')[:400]  # first part of dates
dp.dl_save_w(dates, data_dir + 'weather_1.csv')  # save first part
dates = pd.DatetimeIndex(
    data.index).strftime('%Y%m%d')[400:800]  # second part of dates
Пример #5
0
test_pred = arima_vw(train, test, batch=7, freq=52)
r2_score(y_true=test, y_pred=test_pred, multioutput='uniform_average')
dp.save(data=test_pred,
        path='C:/Users/SABA/Google Drive/mtsg/data/arima_vw.csv')
# horizontal
test_pred = arima(train, test, hor=24, batch=7, freq=24)
r2_score(y_true=test, y_pred=test_pred, multioutput='uniform_average')
dp.save(data=test_pred,
        path='C:/Users/SABA/Google Drive/mtsg/data/arima_h.csv')
# horizontal week
test_pred = arima_hw(train, test, batch=7, freq=52)
r2_score(y_true=test, y_pred=test_pred, multioutput='uniform_average')
dp.save(data=test_pred,
        path='C:/Users/SABA/Google Drive/mtsg/data/arima_hw.csv')

train = dp.split(train, nsplits=7)[1]
test = dp.split(test, nsplits=7)[1]

train.to_csv(
    path_or_buf=
    'C:/Users/SABA/Google Drive/mtsg/code/load_forecast/data/load_train.csv',
    header=True,
    sep=',',
    decimal='.')
test.to_csv(
    path_or_buf=
    'C:/Users/SABA/Google Drive/mtsg/code/load_forecast/data/load_test.csv',
    header=True,
    sep=',',
    decimal='.')
targets.to_csv(
Пример #6
0
Файл: ets.py Проект: lulzzz/mtsg
def ets_vw(train,test,batch=7,freq=52):
	test_pred=pd.DataFrame(data=None,index=test.index,columns=test.columns) # template structure for dataframe for predictions
	for (i,train_day,test_day) in [(i, dp.split(train,nsplits=7)[i], dp.split(test,nsplits=7)[i]) for i in dp.split(train,nsplits=7)]: # for each day
		test_day_pred=ets_v(train_day,test_day,hor=1,batch=batch,freq=freq) # predict for all hours of the respective day
		test_pred.iloc[i::7]=test_day_pred # fill corresponding rows with out of sample predictions
	return test_pred