def __init__(self): self.n_features = 13 self.DI = DataInteraction() self.Pre = Prediction() self.Reg = Regression() self.n_actions = 5 #self.DI.getActionCount()
def __init__(self): self.params = { 'eta': 0.1, 'max_depth': 6, 'subsample': 0.8, 'colsample_bytree': 0.7, 'alpha': 0.2, 'objective': 'reg:linear', 'eval_metric': 'mae', 'silent': True, 'nthread': -1, 'scale_pos_weight ': 1, } self.DI = DataInteraction()
def __init__(self): self.DI = DataInteraction()
class Prediction(): def __init__(self): self.DI = DataInteraction() def get_res(self): self.train_model() input = self._getTrainData() res = self.predict(input) # print('predicted 15min outdoor indicators:{}\n'.format(res[0,:-1])) return res[0,:-1] def train_model(self): time_str = time.strftime('%Y-%m-%d-%H-%M',time.localtime(time.time())) year_str = time_str.split('-')[0][-2:] month_str = time_str.split('-')[1] day_str = time_str.split('-')[2] hour_str = time_str.split('-')[3] min_str = time_str.split('-')[4] # day in 2, 9, 16,23 update prediction model if (int(day_str) in [2,9,16,23,26]) and (int(hour_str) == 23) and (int(min_str)<=40): self._train_prediction() # @dev transform data into 5-min interval def _getTrainData(self): raw = self.DI.getDf(1500) data = raw.iloc[:,[0,1,3,4,6,8,9,24]] data.columns = ['timestamp', 'date','To','Ho','So','Ti','Hi','CTL_1'] data.loc[:,'timestamp'] = data.loc[:,'date'].apply(lambda x: x.timestamp()) input = data.loc[:,['timestamp', 'To', 'Ho','So']].values.tolist() timestamp_last = input[0][0] # print(input) output = [] n_row = 0 for i in range(12): while(1): if (input[n_row][0] <= timestamp_last-900*i): output.append(input[i][1:]) break n_row += 1 output = np.array(output) # print(output) return output def _series_to_supervised(self, data, n_in=1, n_out=1, dropnan=True): n_vars = 1 if type(data) is list else data.shape[1] df = pd.DataFrame(data) cols, names = list(), list() # input sequence (t-n, ... t-1) for i in range(n_in, 0, -1): cols.append(df.shift(i)) names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)] # forecast sequence (t, t+1, ... t+n) for i in range(0, n_out): cols.append(df.shift(-i)) if i == 0: names += [('var%d(t)' % (j+1)) for j in range(n_vars)] else: names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)] # put it all together agg = concat(cols, axis=1) agg.columns = names # drop rows with NaN values if dropnan: agg.dropna(inplace=True) return agg # ** # * @dev train SVR prediction model and use it to predict # * @pram col The column of variable to be predicted # * @pram data The dataset for training and prediction # * @return linear_svr_y_predict The outcome of prediction # def _train_prediction(self): time_str = time.strftime('%Y-%m-%d',time.localtime(time.time())) year_str = time_str.split('-')[0][-2:] month_str = time_str.split('-')[1] day_str = time_str.split('-')[2] filename_data_ = "" dataset = pd.DataFrame() for month in range(int(month_str), 0, -1): for day in range(31, 0, -1): month = '0' + str(month) if len(str(month)) == 1 else str(month) day = '0' + str(day) if len(str(day)) == 1 else str(day) filename_data = '15min_' + year_str + month + day +'.csv' # print(filename_data) try: dataset = read_csv('../../data/env/'+ filename_data, index_col=0) filename_data_ = filename_data break except: pass if len(dataset) > 0: break dataset = dataset[['To','Ho','So']].iloc[0:10000,:] values = dataset.values values = list(values) values.reverse() values = np.array(values) values = values.astype('float32') scaler = MinMaxScaler(feature_range=(0, 1)) scaler_filename = '../../data/scaler/scaler.save' scaled = scaler.fit_transform(values) joblib.dump(scaler, scaler_filename) reframed = self._series_to_supervised(scaled, n_in = 12, n_out = 1 ) # split into train and test sets values = reframed.values[:,:] n_train = int(0.9 * len(values)) train = values[:n_train, :] test = values[n_train:, :] # # split into input and outputs train_X, train_y = train[:, :-3], train[:, -3:] test_X, test_y = test[:, :-3], test[:, -3:] # # reshape input to be 3D [samples, timesteps, features] train_X = train_X.reshape((train_X.shape[0], 12, 3)) test_X = test_X.reshape((test_X.shape[0], 12, 3)) # design network model = Sequential() model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2]))) model.add(Dense(3)) model.compile(loss='mae', optimizer='adam') # fit network history = model.fit(train_X, train_y, epochs=300, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False) # print(filename_data) output_filename = 'lstm_'+str(filename_data_.split('.')[0])+'.h5' # print(output_filename) model.save("../../data/lstm_model/" + output_filename) print('lstm model saved in file {}'.format(output_filename)) def predict(self, input): time_str = time.strftime('%Y-%m-%d',time.localtime(time.time())) year_str = time_str.split('-')[0][-2:] month_str = time_str.split('-')[1] day_str = time_str.split('-')[2] inv_yhat = np.zeros(1) for month in range(int(month_str), 0, -1): for day in range(31, 0, -1): month = '0' + str(month) if len(str(month)) == 1 else str(month) day = '0' + str(day) if len(str(day)) == 1 else str(day) filename_model = 'lstm_15min_' + year_str + month + day +'.h5' filename_data = '15min_' + year_str + month + day +'.csv' # print(filename_model, filename_data) if os.path.exists("../../data/lstm_model/"+filename_model): model = load_model("../../data/lstm_model/"+filename_model) # get scaler # dataset = read_csv('../../data/env/'+filename_data, index_col=0) # dataset = dataset[['To','Ho','So']] # values = dataset.values # values = list(values) # values.reverse() # values = np.array(values) # values = values.astype('float32') # print(values) scaler = joblib.load("../../data/scaler/scaler.save") # scaler = MinMaxScaler(feature_range=(0, 1)) # scaler.fit_transform(values) # input shape is n*36 print(scaler) yhat = model.predict(scaler.transform(input).reshape(1,12,3)) # print(yhat) inv_yhat = np.array(scaler.inverse_transform(yhat)) break print(inv_yhat) if np.sum(abs(inv_yhat))>0 and np.sum(abs(inv_yhat[0]))>0: break return inv_yhat
from Data_interaction import DataInteraction as DI import time, os import warnings warnings.filterwarnings("ignore") def _get_time_string(): return time.strftime('%Y-%m-%d-%H-%M', time.localtime(time.time())) on = int(input('Please input 1 to turn on weather data collection:')) timeStep_forcast = 0 timeStamp = int(time.time()) while (on==1): time_min = int(_get_time_string().split('-')[4]) if (time_min>0 and time_min<2 and int(time.time())>=timeStep_forcast*3600+timeStamp): df_forecast = DI().getWeatherForecast() df_forecast.to_csv('../../data/caiyun/6h_forcast_cy.csv', mode='a', header=False, index=False) timeStep_forcast += 1 print(_get_time_string() + ' :df_forecast data collected' ) df_realtime = DI().getWeatherRealtime() df_realtime.to_csv('../../data/caiyun/30sec_realtime_cy.csv', mode='a', header=False, index=False) print(_get_time_string() + ' :df_realtime data collected') time.sleep(30)
class env(object): def __init__(self): self.n_features = 13 self.DI = DataInteraction() self.Pre = Prediction() self.Reg = Regression() self.n_actions = 5 #self.DI.getActionCount() # ** # * @dev # * @pram # * @pram # * @pram col_tem, col_hum, col_tem_out, col_hum_out, col_tem_pre, col_hum_pre, col_action The columns of variables # * @return # * @ def step(self, id, step, data, col_tem, col_hum, col_tem_out, col_hum_out, col_sol_out, col_tem_pre, col_hum_pre, col_sol_pre, col_action, action_output): # predict tem&hum outside pre_15min = self.Pre.get_res() pre_tem_15min = pre_15min[0] pre_hum_15min = pre_15min[1] self.DI.pushData(id, pre_tem_15min, 'tem_pre') self.DI.pushData(id, pre_hum_15min, 'hum_pre') print('15min prediction data get and tem is {}, hum is {}\n'.format( pre_tem_15min, pre_hum_15min)) # get caiyun api realtime and 3h prediction data caiyun_data = self.Reg.get_res() # caiyun_rl = caiyun_data[0].reshape([2]).tolist() caiyun_3h = caiyun_data[1][:3, :].reshape([6]).tolist() self.DI.pushData(id, caiyun_3h[0], 'T_1h') self.DI.pushData(id, caiyun_3h[1], 'H_1h') self.DI.pushData(id, caiyun_3h[2], 'T_2h') self.DI.pushData(id, caiyun_3h[3], 'H_2h') self.DI.pushData(id, caiyun_3h[4], 'T_3h') self.DI.pushData(id, caiyun_3h[5], 'H_3h') print( 'caiyun 3h prediction data get and tem is {}... , hum is {}...\n'. format(caiyun_3h[0], caiyun_3h[1])) # initial variables of observation s_tem = data[0][col_tem] s_hum = data[0][col_hum] s_tem_out = data[0][col_tem_out] s_hum_out = data[0][col_hum_out] s_sol_out = data[0][col_sol_out] env_for_ac_choose = np.array([ s_tem, s_tem_out, pre_tem_15min, caiyun_3h[0], caiyun_3h[2], caiyun_3h[4], action_output ]) observation_ = [s_tem,s_hum,s_tem_out,s_hum_out,s_sol_out,\ pre_tem_15min,pre_hum_15min] + caiyun_3h # print('observation data generated and length is {}'.format(len(observation_))) reward_tem = s_tem - 25.5 if (s_tem - 25.5) > 0 else 0 reward_ener = self.DI.cal_power(id) reward = -(reward_ener / 0.175 / 2 + reward_tem / 2 / 2) if step < 10000: done = False else: done = True return observation_, reward, done, env_for_ac_choose
import pandas as pd import numpy as np import matplotlib.pylab as plt import xlrd import openpyxl from sklearn.datasets import load_boston from sklearn.cross_validation import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.svm import SVR from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error from Data_interaction import DataInteraction class Prediction(): def __init__(self): DI = DataInteraction() def _train_prediction(self,timeStep,col,startId,num = 16002): #导入数据? table = DI.getData(timeStep,col,startId,num) # #数据manipulation # nrows = table.nrows #行数? # #c1=arange(0,nrows,1) #0到行数的list table.reverse() table = np.array(table)[:,col] # cols_list =table.col_values(x) # cols_array=np.array(cols)# 把list转换为矩阵进行矩阵操作 # datamatrix[:,x]=cols_array# 把数据进行存储
class Regression(): def __init__(self): self.params = { 'eta': 0.1, 'max_depth': 6, 'subsample': 0.8, 'colsample_bytree': 0.7, 'alpha': 0.2, 'objective': 'reg:linear', 'eval_metric': 'mae', 'silent': True, 'nthread': -1, 'scale_pos_weight ': 1, } self.DI = DataInteraction() def get_res(self): self.train_model() res_now, res_pre = self.predict() # 输出:温度 - col0, 湿度 - col1 # print('regression outdoor indicators:{}\n'.format(res_now)) # print('predicted 6 hours outdoor indicators:{}\n'.format(res_pre)) return res_now, res_pre def train_model(self): time_str = time.strftime('%Y-%m-%d-%H-%M', time.localtime(time.time())) year_str = time_str.split('-')[0][-2:] month_str = time_str.split('-')[1] day_str = time_str.split('-')[2] hour_str = time_str.split('-')[3] min_str = time_str.split('-')[4] # day in 2, 9, 16,23 update prediction model if (int(day_str) in [ 2, 9, 16, 23 ]) and (int(hour_str) == 0) and (int(min_str) <= 20): self._train_regression() # @dev load xgboost models and generate output based on these models def predict(self): # DI = DataInteraction time_str = time.strftime('%Y-%m-%d-%H-%M', time.localtime(time.time())) year_str = time_str.split('-')[0][-2:] month_str = time_str.split('-')[1] day_str = time_str.split('-')[2] df = self.DI.getDf(1000) data = df.iloc[:, [0, 1, 3, 4, 6]] data.columns = ['id', 'date', 'To', 'Ho', 'So'] data.loc[:, 'timestamp'] = data.loc[:, 'date'].apply(lambda x: x.timestamp()) data.loc[:, 'year'] = data.loc[:, 'date'].apply( lambda x: x.strftime('%Y')).astype('int') data.loc[:, 'month'] = data.loc[:, 'date'].apply( lambda x: x.strftime('%m')).astype('int') data.loc[:, 'day'] = data.loc[:, 'date'].apply( lambda x: x.strftime('%d')).astype('int') data.loc[:, 'hour'] = data.loc[:, 'date'].apply( lambda x: x.strftime('%H')).astype('int') data.loc[:, 'minute'] = data.loc[:, 'date'].apply( lambda x: x.strftime('%M')).astype('int') # df_hour = data[data['minute']==0].reset_index(drop=True) rl_sql_1h = data[data['minute'] == 0].reset_index( drop=True).drop_duplicates(subset=['hour']) rl_sql_1h['To_1'] = rl_sql_1h['To'].shift(-1) rl_sql_1h['Ho_1'] = rl_sql_1h['Ho'].shift(-1) rl_sql_1h['So_1'] = rl_sql_1h['So'].shift(-1) rl_sql_1h['To_2'] = rl_sql_1h['To'].shift(-2) rl_sql_1h['Ho_2'] = rl_sql_1h['Ho'].shift(-2) rl_sql_1h['So_2'] = rl_sql_1h['So'].shift(-2) rl_sql_1h = rl_sql_1h.iloc[0:1].reset_index(drop=True) # df_1min_sql = pd.read_csv('../../data/env/1min_190502.csv')[['To','Ho','So','year','month','day','hour','minute']] # rl = pd.read_csv('../../data/caiyun/30sec_realtime_cy.csv') for_6h = pd.read_csv('../../data/caiyun/6h_forcast_cy.csv').iloc[-1:] for_1h = for_6h[[ 'month', 'day', 'hour', 'To_forecast1', 'Ho_forecast1', 'Co_forecast1', 'So_forecast1' ]].reset_index(drop=True) for_2h = for_6h[[ 'month', 'day', 'hour', 'To_forecast2', 'Ho_forecast2', 'Co_forecast2', 'So_forecast2' ]].reset_index(drop=True) for_3h = for_6h[[ 'month', 'day', 'hour', 'To_forecast3', 'Ho_forecast3', 'Co_forecast3', 'So_forecast3' ]].reset_index(drop=True) model_To = [] model_Ho = [] for month in range(int(month_str), 0, -1): for day in range(31, 0, -1): month = '0' + str(month) if len( str(month)) == 1 else str(month) day = '0' + str(day) if len(str(day)) == 1 else str(day) filename_model = '1min_To_0_0_' + year_str + month + day + '.model' if os.path.exists("../../data/xgb_model/" + filename_model): for i in range(3): for j in range(3): model_To.append( xgb.Booster( model_file= '../../data/xgb_model/1min_To_{}_{}_{}{}{}.model' .format(i, j, year_str, month, day))) model_Ho.append( xgb.Booster( model_file= '../../data/xgb_model/1min_Ho_{}_{}_{}{}{}.model' .format(i, j, year_str, month, day))) for_1h = pd.concat([ for_1h, rl_sql_1h[[ 'To', 'So', 'Ho', 'To_1', 'So_1', 'Ho_1', 'To_2', 'So_2', 'Ho_2' ]] ], axis=1) output_now = np.zeros([1, 2]) output_pre = np.zeros([3, 2]) input = xgb.DMatrix(for_1h[['month','day','hour','To_forecast1','Ho_forecast1','Co_forecast1',\ 'So_forecast1','To','Ho','So','To_1','Ho_1','So_1','To_2','Ho_2','So_2']]) for i in range(len(model_To)): output_pre[0, 0] += model_To[i].predict(input) / len(model_To) for i in range(len(model_Ho)): output_pre[0, 1] += model_Ho[i].predict(input) / len(model_Ho) for_2h['To'] = output_pre[0, 0] for_2h['Ho'] = output_pre[0, 1] for_2h['So'] = for_2h['So_forecast2'] / (for_1h['So_forecast1'] + 1) * for_1h['So'] for_2h['To_1'] = for_1h['To'] for_2h['Ho_1'] = for_1h['Ho'] for_2h['So_1'] = for_1h['So'] for_2h['To_2'] = for_1h['To_1'] for_2h['Ho_2'] = for_1h['Ho_1'] for_2h['So_2'] = for_1h['So_1'] for_2h = for_2h.rename(columns=lambda x: x.replace('st2', 'st1')) input_2 = xgb.DMatrix(for_2h[[ 'month', 'day', 'hour', 'To_forecast1', 'Ho_forecast1', 'Co_forecast1', 'So_forecast1', 'To', 'Ho', 'So', 'To_1', 'Ho_1', 'So_1', 'To_2', 'Ho_2', 'So_2' ]]) for i in range(len(model_To)): output_pre[1, 0] += model_To[i].predict(input_2) / len(model_To) for i in range(len(model_Ho)): output_pre[1, 1] += model_Ho[i].predict(input_2) / len(model_Ho) for_3h['To'] = output_pre[1, 0] for_3h['Ho'] = output_pre[1, 1] for_3h['So'] = for_3h['So_forecast3'] / (for_2h['So_forecast1'] + 1) * for_2h['So'] for_3h['To_1'] = for_2h['To'] for_3h['Ho_1'] = for_2h['Ho'] for_3h['So_1'] = for_2h['So'] for_3h['To_2'] = for_2h['To_1'] for_3h['Ho_2'] = for_2h['Ho_1'] for_3h['So_2'] = for_2h['So_1'] for_3h = for_3h.rename(columns=lambda x: x.replace('st3', 'st1')) input_3 = xgb.DMatrix(for_3h[[ 'month', 'day', 'hour', 'To_forecast1', 'Ho_forecast1', 'Co_forecast1', 'So_forecast1', 'To', 'Ho', 'So', 'To_1', 'Ho_1', 'So_1', 'To_2', 'Ho_2', 'So_2' ]]) for i in range(len(model_To)): output_pre[2, 0] += model_To[i].predict(input_3) / len(model_To) for i in range(len(model_Ho)): output_pre[2, 1] += model_Ho[i].predict(input_3) / len(model_Ho) return output_now, output_pre def _train_xgb_model(self, trainset, label, en_amount=3, NFOLDS=3): time_str = time.strftime('%Y-%m-%d-%H-%M', time.localtime(time.time())) year_str = time_str.split('-')[0][-2:] month_str = time_str.split('-')[1] day_str = time_str.split('-')[2] for seed in range(en_amount): train_data_use = trainset.drop(columns=[label]).reset_index( drop=True) train_label = trainset[label].reset_index(drop=True) train_label_index = train_label.astype('int') ### ? kfold = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=seed) kf = kfold.split(train_data_use, train_label_index) for i, (trn_idx, val_idx) in enumerate(kf): trn_data = xgb.DMatrix(train_data_use.iloc[trn_idx, :], train_label[trn_idx]) val_data = xgb.DMatrix(train_data_use.iloc[val_idx, :], train_label[val_idx]) watchlist = [(trn_data, 'train'), (val_data, 'valid_data')] clf = xgb.train(dtrain=trn_data, num_boost_round=10000, evals=watchlist, early_stopping_rounds=200, verbose_eval=1000, params=self.params) clf.save_model( '../../data/xgb_model/1min_{}_{}_{}_{}{}{}.model'.format( label, seed, i, year_str, month_str, day_str)) print('xgb model saved..\n') # ** # * @dev train SVR prediction model and use it to predict # * @pram col The column of variable to be predicted # * @pram data The dataset for training and prediction # * @return linear_svr_y_predict The outcome of prediction # def _train_regression(self): time_str = time.strftime('%Y-%m-%d', time.localtime(time.time())) year_str = time_str.split('-')[0][-2:] month_str = time_str.split('-')[1] day_str = time_str.split('-')[2] filename_data_ = "" dataset = pd.DataFrame() for month in range(int(month_str), 0, -1): for day in range(31, 0, -1): month = '0' + str(month) if len( str(month)) == 1 else str(month) day = '0' + str(day) if len(str(day)) == 1 else str(day) filename_data = '1min_' + year_str + month + day + '.csv' # print(filename_data) try: dataset = read_csv('../../data/env/' + filename_data, index_col=0) filename_data_ = filename_data break except: pass if len(dataset) > 0: break df_1min_sql = dataset[[ 'To', 'Ho', 'So', 'year', 'month', 'day', 'hour', 'minute' ]] rl = pd.read_csv('../../data/caiyun/30sec_realtime_cy.csv') rl_1min = rl_1min = rl[rl['minute'] % 1 == 0].iloc[:, -23000:].reset_index(drop=True) rows = [ x for x in range(len(rl_1min) - 1) if rl_1min.iloc[x]['minute'] == rl_1min.iloc[x + 1]['minute'] ] rl_1min.drop(rows, inplace=True) rl_1min.reset_index(drop=True) rl_sql_1min = rl_1min.merge( df_1min_sql, how='left', on=['year', 'month', 'day', 'hour', 'minute']) rl_sql_1min.dropna(inplace=True) df_for_merge_1h = rl_sql_1min[['timestamp', 'To', 'Ho', 'So']].rename(columns={ 'To': 'To_1', 'Ho': 'Ho_1', 'So': 'So_1' }) df_for_merge_1h['timestamp'] = (df_for_merge_1h['timestamp'] + 3600) / 60 df_for_merge_1h['timestamp'] = df_for_merge_1h['timestamp'].astype( 'int') df_for_merge_2h = rl_sql_1min[['timestamp', 'To', 'Ho', 'So']].rename(columns={ 'To': 'To_2', 'Ho': 'Ho_2', 'So': 'So_2' }) df_for_merge_2h['timestamp'] = (df_for_merge_2h['timestamp'] + 3600 * 2) / 60 df_for_merge_2h['timestamp'] = df_for_merge_2h['timestamp'].astype( 'int') df_for_merge_3h = rl_sql_1min[['timestamp', 'To', 'Ho', 'So']].rename(columns={ 'To': 'To_3', 'Ho': 'Ho_3', 'So': 'So_3' }) df_for_merge_3h['timestamp'] = (df_for_merge_3h['timestamp'] + 3600 * 3) / 60 df_for_merge_3h['timestamp'] = df_for_merge_3h['timestamp'].astype( 'int') rl_sql_1min['timestamp'] = (rl_sql_1min['timestamp']) / 60 rl_sql_1min['timestamp'] = rl_sql_1min['timestamp'].astype('int') rl_sql_1min = pd.merge(rl_sql_1min, df_for_merge_1h, on=['timestamp'], how='left') rl_sql_1min = pd.merge(rl_sql_1min, df_for_merge_2h, on=['timestamp'], how='left') rl_sql_1min = pd.merge(rl_sql_1min, df_for_merge_3h, on=['timestamp'], how='left') rl_sql_1min.dropna(inplace=True) dataset_To = rl_sql_1min[[ 'month', 'day', 'hour', 'To_cy', 'Ho_cy', 'Co_cy', 'So_cy', 'To_1', 'Ho_1', 'So_1', 'To_2', 'Ho_2', 'So_2', 'To_3', 'Ho_3', 'So_3', 'To' ]] dataset_Ho = rl_sql_1min[[ 'month', 'day', 'hour', 'To_cy', 'Ho_cy', 'Co_cy', 'So_cy', 'To_1', 'Ho_1', 'So_1', 'To_2', 'Ho_2', 'So_2', 'To_3', 'Ho_3', 'So_3', 'Ho' ]] self._train_xgb_model(dataset_To, label='To') print('Tem xgb model trained..\n') self._train_xgb_model(dataset_Ho, label='Ho') print('Hum xgb model trained..\n')