Пример #1
0
    def __init__(self):

        self.n_features = 13
        self.DI = DataInteraction()
        self.Pre = Prediction()
        self.Reg = Regression()
        self.n_actions = 5  #self.DI.getActionCount()
Пример #2
0
 def __init__(self):
     self.params = {
         'eta': 0.1,
         'max_depth': 6,
         'subsample': 0.8,
         'colsample_bytree': 0.7,
         'alpha': 0.2,
         'objective': 'reg:linear',
         'eval_metric': 'mae',
         'silent': True,
         'nthread': -1,
         'scale_pos_weight ': 1,
     }
     self.DI = DataInteraction()
	def __init__(self):
		self.DI = DataInteraction()
class Prediction():
	def __init__(self):
		self.DI = DataInteraction()

	def get_res(self):
		self.train_model()
		input = self._getTrainData()		
		res =  self.predict(input)
		# print('predicted 15min outdoor indicators:{}\n'.format(res[0,:-1]))
		return res[0,:-1]

	def train_model(self):
		time_str = time.strftime('%Y-%m-%d-%H-%M',time.localtime(time.time()))
		year_str = time_str.split('-')[0][-2:]
		month_str = time_str.split('-')[1]
		day_str = time_str.split('-')[2]
		hour_str = time_str.split('-')[3]
		min_str = time_str.split('-')[4]
		# day in 2, 9, 16,23 update prediction model
		if (int(day_str) in [2,9,16,23,26]) and (int(hour_str) == 23) and (int(min_str)<=40):			
			self._train_prediction()

# @dev transform data into 5-min interval	
	def _getTrainData(self):
		raw = self.DI.getDf(1500)
		data = raw.iloc[:,[0,1,3,4,6,8,9,24]]
		data.columns = ['timestamp', 'date','To','Ho','So','Ti','Hi','CTL_1']
		data.loc[:,'timestamp'] = data.loc[:,'date'].apply(lambda x: x.timestamp())
		input = data.loc[:,['timestamp', 'To', 'Ho','So']].values.tolist()
		timestamp_last = input[0][0]
		# print(input)
		output = []
		n_row = 0
		for i in range(12):
			while(1):
				if (input[n_row][0] <= timestamp_last-900*i):
					output.append(input[i][1:])
					break
				n_row += 1		
		output = np.array(output)
		# print(output)
		return output

	def _series_to_supervised(self, data, n_in=1, n_out=1, dropnan=True):
		n_vars = 1 if type(data) is list else data.shape[1]
		df = pd.DataFrame(data)
		cols, names = list(), list()
		# input sequence (t-n, ... t-1)
		for i in range(n_in, 0, -1):
			cols.append(df.shift(i))
			names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
		# forecast sequence (t, t+1, ... t+n)
		for i in range(0, n_out):
			cols.append(df.shift(-i))
			if i == 0:
				names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
			else:
				names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
		# put it all together
		agg = concat(cols, axis=1)
		agg.columns = names
		# drop rows with NaN values
		if dropnan:
			agg.dropna(inplace=True)
		return agg
	# **
	# * @dev train SVR prediction model and use it to predict
	# * @pram col The column of variable to be predicted
	# * @pram data The dataset for training and prediction
	# * @return linear_svr_y_predict The outcome of prediction
	#
	def _train_prediction(self):
		time_str = time.strftime('%Y-%m-%d',time.localtime(time.time()))
		year_str = time_str.split('-')[0][-2:]
		month_str = time_str.split('-')[1]
		day_str = time_str.split('-')[2]
		filename_data_ = ""
		dataset = pd.DataFrame()
		for month in range(int(month_str), 0, -1):
			for day in range(31, 0, -1):
				month = '0' + str(month) if len(str(month)) == 1 else str(month)
				day = '0' + str(day) if len(str(day)) == 1 else str(day)
				filename_data = '15min_' + year_str + month + day +'.csv' 
				# print(filename_data)
				try:	
					dataset = read_csv('../../data/env/'+ filename_data,  index_col=0)
					filename_data_ = filename_data
					break
				except:
					pass
			if len(dataset) > 0:
				break
		
		dataset = dataset[['To','Ho','So']].iloc[0:10000,:]

		values = dataset.values
		values = list(values)
		values.reverse()
		values = np.array(values)
		values = values.astype('float32')

		scaler = MinMaxScaler(feature_range=(0, 1))
		scaler_filename = '../../data/scaler/scaler.save'
		
		scaled = scaler.fit_transform(values)
		joblib.dump(scaler, scaler_filename)

		reframed = self._series_to_supervised(scaled, n_in = 12, n_out = 1 )
		# split into train and test sets
		values = reframed.values[:,:]

		n_train = int(0.9 * len(values))
		train = values[:n_train, :]
		test = values[n_train:, :]
		# # split into input and outputs
		train_X, train_y = train[:, :-3], train[:, -3:]
		test_X, test_y = test[:, :-3], test[:, -3:]

		# # reshape input to be 3D [samples, timesteps, features]
		train_X = train_X.reshape((train_X.shape[0], 12, 3))
		test_X = test_X.reshape((test_X.shape[0], 12, 3)) 
		# design network
		model = Sequential()
		model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
		model.add(Dense(3))
		model.compile(loss='mae', optimizer='adam')
		# fit network
		history = model.fit(train_X, train_y, epochs=300, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)
		# print(filename_data)
		output_filename = 'lstm_'+str(filename_data_.split('.')[0])+'.h5'
		# print(output_filename)
		model.save("../../data/lstm_model/" + output_filename)
		print('lstm model saved in file {}'.format(output_filename))
	
	def predict(self, input):
		time_str = time.strftime('%Y-%m-%d',time.localtime(time.time()))
		year_str = time_str.split('-')[0][-2:]
		month_str = time_str.split('-')[1]
		day_str = time_str.split('-')[2]
		inv_yhat = np.zeros(1)
		for month in range(int(month_str), 0, -1):
			for day in range(31, 0, -1):
				month = '0' + str(month) if len(str(month)) == 1 else str(month)
				day = '0' + str(day) if len(str(day)) == 1 else str(day)
				filename_model = 'lstm_15min_' + year_str + month + day +'.h5'
				
				filename_data = '15min_' + year_str + month + day +'.csv'
				# print(filename_model, filename_data)
				if os.path.exists("../../data/lstm_model/"+filename_model):
					model = load_model("../../data/lstm_model/"+filename_model)
					# get scaler
					# dataset = read_csv('../../data/env/'+filename_data,  index_col=0)
					# dataset = dataset[['To','Ho','So']]
					# values = dataset.values
					# values = list(values)
					# values.reverse()
					# values = np.array(values)
					# values = values.astype('float32')
					# print(values)
					scaler = joblib.load("../../data/scaler/scaler.save")
					# scaler = MinMaxScaler(feature_range=(0, 1))
					# scaler.fit_transform(values)

					# input shape is n*36
					print(scaler)
					yhat = model.predict(scaler.transform(input).reshape(1,12,3))
					# print(yhat)
					inv_yhat = np.array(scaler.inverse_transform(yhat))
					break
			print(inv_yhat)
			if np.sum(abs(inv_yhat))>0 and np.sum(abs(inv_yhat[0]))>0:
				break 

		return inv_yhat
Пример #5
0
from Data_interaction import DataInteraction as DI 
import time, os
import warnings
warnings.filterwarnings("ignore")

def _get_time_string():
    return time.strftime('%Y-%m-%d-%H-%M', time.localtime(time.time()))

on = int(input('Please input 1 to turn on weather data collection:'))
timeStep_forcast = 0
timeStamp = int(time.time())
while (on==1):
    time_min = int(_get_time_string().split('-')[4])
    
    if (time_min>0 and time_min<2 and int(time.time())>=timeStep_forcast*3600+timeStamp):
        df_forecast = DI().getWeatherForecast()
        df_forecast.to_csv('../../data/caiyun/6h_forcast_cy.csv', mode='a', header=False, index=False)
        timeStep_forcast += 1 
        print(_get_time_string() + ' :df_forecast data collected' )
    
    df_realtime = DI().getWeatherRealtime()
    df_realtime.to_csv('../../data/caiyun/30sec_realtime_cy.csv', mode='a', header=False, index=False)
    print(_get_time_string() + ' :df_realtime data collected')
    time.sleep(30)
    
    
Пример #6
0
class env(object):
    def __init__(self):

        self.n_features = 13
        self.DI = DataInteraction()
        self.Pre = Prediction()
        self.Reg = Regression()
        self.n_actions = 5  #self.DI.getActionCount()

    # **
    # * @dev
    # * @pram
    # * @pram
    # * @pram col_tem, col_hum, col_tem_out, col_hum_out, col_tem_pre, col_hum_pre, col_action The columns of 			variables
    # * @return
    # * @
    def step(self, id, step, data, col_tem, col_hum, col_tem_out, col_hum_out,
             col_sol_out, col_tem_pre, col_hum_pre, col_sol_pre, col_action,
             action_output):

        # predict tem&hum outside
        pre_15min = self.Pre.get_res()
        pre_tem_15min = pre_15min[0]
        pre_hum_15min = pre_15min[1]
        self.DI.pushData(id, pre_tem_15min, 'tem_pre')
        self.DI.pushData(id, pre_hum_15min, 'hum_pre')
        print('15min prediction data get and tem is {}, hum is {}\n'.format(
            pre_tem_15min, pre_hum_15min))
        # get caiyun api realtime and 3h prediction data
        caiyun_data = self.Reg.get_res()
        # caiyun_rl = caiyun_data[0].reshape([2]).tolist()
        caiyun_3h = caiyun_data[1][:3, :].reshape([6]).tolist()
        self.DI.pushData(id, caiyun_3h[0], 'T_1h')
        self.DI.pushData(id, caiyun_3h[1], 'H_1h')
        self.DI.pushData(id, caiyun_3h[2], 'T_2h')
        self.DI.pushData(id, caiyun_3h[3], 'H_2h')
        self.DI.pushData(id, caiyun_3h[4], 'T_3h')
        self.DI.pushData(id, caiyun_3h[5], 'H_3h')
        print(
            'caiyun 3h prediction data get and tem is {}... , hum is {}...\n'.
            format(caiyun_3h[0], caiyun_3h[1]))
        # initial variables of observation
        s_tem = data[0][col_tem]
        s_hum = data[0][col_hum]
        s_tem_out = data[0][col_tem_out]
        s_hum_out = data[0][col_hum_out]
        s_sol_out = data[0][col_sol_out]

        env_for_ac_choose = np.array([
            s_tem, s_tem_out, pre_tem_15min, caiyun_3h[0], caiyun_3h[2],
            caiyun_3h[4], action_output
        ])

        observation_ = [s_tem,s_hum,s_tem_out,s_hum_out,s_sol_out,\
            pre_tem_15min,pre_hum_15min] + caiyun_3h
        # print('observation data generated and length is {}'.format(len(observation_)))
        reward_tem = s_tem - 25.5 if (s_tem - 25.5) > 0 else 0
        reward_ener = self.DI.cal_power(id)
        reward = -(reward_ener / 0.175 / 2 + reward_tem / 2 / 2)

        if step < 10000:
            done = False
        else:
            done = True

        return observation_, reward, done, env_for_ac_choose
Пример #7
0
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import xlrd
import openpyxl
from sklearn.datasets import load_boston
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from Data_interaction import DataInteraction 


class Prediction():
	def __init__(self):
		DI = DataInteraction()

	def _train_prediction(self,timeStep,col,startId,num = 16002):
	#导入数据?
		table = DI.getData(timeStep,col,startId,num)

	# #数据manipulation
	# 	nrows = table.nrows #行数?
	# 	#c1=arange(0,nrows,1) #0到行数的list
		table.reverse()
		table = np.array(table)[:,col]

	# 	cols_list =table.col_values(x)      
	# 	cols_array=np.array(cols)# 把list转换为矩阵进行矩阵操作
	# 	datamatrix[:,x]=cols_array# 把数据进行存储 
Пример #8
0
class Regression():
    def __init__(self):
        self.params = {
            'eta': 0.1,
            'max_depth': 6,
            'subsample': 0.8,
            'colsample_bytree': 0.7,
            'alpha': 0.2,
            'objective': 'reg:linear',
            'eval_metric': 'mae',
            'silent': True,
            'nthread': -1,
            'scale_pos_weight ': 1,
        }
        self.DI = DataInteraction()

    def get_res(self):
        self.train_model()
        res_now, res_pre = self.predict()  # 输出:温度 - col0, 湿度 - col1
        # print('regression outdoor indicators:{}\n'.format(res_now))
        # print('predicted 6 hours outdoor indicators:{}\n'.format(res_pre))
        return res_now, res_pre

    def train_model(self):
        time_str = time.strftime('%Y-%m-%d-%H-%M', time.localtime(time.time()))
        year_str = time_str.split('-')[0][-2:]
        month_str = time_str.split('-')[1]
        day_str = time_str.split('-')[2]
        hour_str = time_str.split('-')[3]
        min_str = time_str.split('-')[4]
        # day in 2, 9, 16,23 update prediction model
        if (int(day_str) in [
                2, 9, 16, 23
        ]) and (int(hour_str) == 0) and (int(min_str) <= 20):
            self._train_regression()


# @dev load xgboost models and generate output based on these models

    def predict(self):
        #         DI = DataInteraction
        time_str = time.strftime('%Y-%m-%d-%H-%M', time.localtime(time.time()))
        year_str = time_str.split('-')[0][-2:]
        month_str = time_str.split('-')[1]
        day_str = time_str.split('-')[2]

        df = self.DI.getDf(1000)
        data = df.iloc[:, [0, 1, 3, 4, 6]]
        data.columns = ['id', 'date', 'To', 'Ho', 'So']
        data.loc[:,
                 'timestamp'] = data.loc[:,
                                         'date'].apply(lambda x: x.timestamp())
        data.loc[:, 'year'] = data.loc[:, 'date'].apply(
            lambda x: x.strftime('%Y')).astype('int')
        data.loc[:, 'month'] = data.loc[:, 'date'].apply(
            lambda x: x.strftime('%m')).astype('int')
        data.loc[:, 'day'] = data.loc[:, 'date'].apply(
            lambda x: x.strftime('%d')).astype('int')
        data.loc[:, 'hour'] = data.loc[:, 'date'].apply(
            lambda x: x.strftime('%H')).astype('int')
        data.loc[:, 'minute'] = data.loc[:, 'date'].apply(
            lambda x: x.strftime('%M')).astype('int')
        # df_hour = data[data['minute']==0].reset_index(drop=True)
        rl_sql_1h = data[data['minute'] == 0].reset_index(
            drop=True).drop_duplicates(subset=['hour'])
        rl_sql_1h['To_1'] = rl_sql_1h['To'].shift(-1)
        rl_sql_1h['Ho_1'] = rl_sql_1h['Ho'].shift(-1)
        rl_sql_1h['So_1'] = rl_sql_1h['So'].shift(-1)
        rl_sql_1h['To_2'] = rl_sql_1h['To'].shift(-2)
        rl_sql_1h['Ho_2'] = rl_sql_1h['Ho'].shift(-2)
        rl_sql_1h['So_2'] = rl_sql_1h['So'].shift(-2)
        rl_sql_1h = rl_sql_1h.iloc[0:1].reset_index(drop=True)
        #         df_1min_sql = pd.read_csv('../../data/env/1min_190502.csv')[['To','Ho','So','year','month','day','hour','minute']]
        #         rl = pd.read_csv('../../data/caiyun/30sec_realtime_cy.csv')
        for_6h = pd.read_csv('../../data/caiyun/6h_forcast_cy.csv').iloc[-1:]

        for_1h = for_6h[[
            'month', 'day', 'hour', 'To_forecast1', 'Ho_forecast1',
            'Co_forecast1', 'So_forecast1'
        ]].reset_index(drop=True)
        for_2h = for_6h[[
            'month', 'day', 'hour', 'To_forecast2', 'Ho_forecast2',
            'Co_forecast2', 'So_forecast2'
        ]].reset_index(drop=True)
        for_3h = for_6h[[
            'month', 'day', 'hour', 'To_forecast3', 'Ho_forecast3',
            'Co_forecast3', 'So_forecast3'
        ]].reset_index(drop=True)

        model_To = []
        model_Ho = []
        for month in range(int(month_str), 0, -1):
            for day in range(31, 0, -1):
                month = '0' + str(month) if len(
                    str(month)) == 1 else str(month)
                day = '0' + str(day) if len(str(day)) == 1 else str(day)
                filename_model = '1min_To_0_0_' + year_str + month + day + '.model'
                if os.path.exists("../../data/xgb_model/" + filename_model):
                    for i in range(3):
                        for j in range(3):
                            model_To.append(
                                xgb.Booster(
                                    model_file=
                                    '../../data/xgb_model/1min_To_{}_{}_{}{}{}.model'
                                    .format(i, j, year_str, month, day)))
                            model_Ho.append(
                                xgb.Booster(
                                    model_file=
                                    '../../data/xgb_model/1min_Ho_{}_{}_{}{}{}.model'
                                    .format(i, j, year_str, month, day)))

        for_1h = pd.concat([
            for_1h, rl_sql_1h[[
                'To', 'So', 'Ho', 'To_1', 'So_1', 'Ho_1', 'To_2', 'So_2',
                'Ho_2'
            ]]
        ],
                           axis=1)

        output_now = np.zeros([1, 2])
        output_pre = np.zeros([3, 2])

        input = xgb.DMatrix(for_1h[['month','day','hour','To_forecast1','Ho_forecast1','Co_forecast1',\
                                   'So_forecast1','To','Ho','So','To_1','Ho_1','So_1','To_2','Ho_2','So_2']])
        for i in range(len(model_To)):
            output_pre[0, 0] += model_To[i].predict(input) / len(model_To)
        for i in range(len(model_Ho)):
            output_pre[0, 1] += model_Ho[i].predict(input) / len(model_Ho)

        for_2h['To'] = output_pre[0, 0]
        for_2h['Ho'] = output_pre[0, 1]
        for_2h['So'] = for_2h['So_forecast2'] / (for_1h['So_forecast1'] +
                                                 1) * for_1h['So']
        for_2h['To_1'] = for_1h['To']
        for_2h['Ho_1'] = for_1h['Ho']
        for_2h['So_1'] = for_1h['So']

        for_2h['To_2'] = for_1h['To_1']
        for_2h['Ho_2'] = for_1h['Ho_1']
        for_2h['So_2'] = for_1h['So_1']
        for_2h = for_2h.rename(columns=lambda x: x.replace('st2', 'st1'))

        input_2 = xgb.DMatrix(for_2h[[
            'month', 'day', 'hour', 'To_forecast1', 'Ho_forecast1',
            'Co_forecast1', 'So_forecast1', 'To', 'Ho', 'So', 'To_1', 'Ho_1',
            'So_1', 'To_2', 'Ho_2', 'So_2'
        ]])
        for i in range(len(model_To)):
            output_pre[1, 0] += model_To[i].predict(input_2) / len(model_To)
        for i in range(len(model_Ho)):
            output_pre[1, 1] += model_Ho[i].predict(input_2) / len(model_Ho)

        for_3h['To'] = output_pre[1, 0]
        for_3h['Ho'] = output_pre[1, 1]
        for_3h['So'] = for_3h['So_forecast3'] / (for_2h['So_forecast1'] +
                                                 1) * for_2h['So']
        for_3h['To_1'] = for_2h['To']
        for_3h['Ho_1'] = for_2h['Ho']
        for_3h['So_1'] = for_2h['So']

        for_3h['To_2'] = for_2h['To_1']
        for_3h['Ho_2'] = for_2h['Ho_1']
        for_3h['So_2'] = for_2h['So_1']
        for_3h = for_3h.rename(columns=lambda x: x.replace('st3', 'st1'))

        input_3 = xgb.DMatrix(for_3h[[
            'month', 'day', 'hour', 'To_forecast1', 'Ho_forecast1',
            'Co_forecast1', 'So_forecast1', 'To', 'Ho', 'So', 'To_1', 'Ho_1',
            'So_1', 'To_2', 'Ho_2', 'So_2'
        ]])
        for i in range(len(model_To)):
            output_pre[2, 0] += model_To[i].predict(input_3) / len(model_To)
        for i in range(len(model_Ho)):
            output_pre[2, 1] += model_Ho[i].predict(input_3) / len(model_Ho)
        return output_now, output_pre

    def _train_xgb_model(self, trainset, label, en_amount=3, NFOLDS=3):
        time_str = time.strftime('%Y-%m-%d-%H-%M', time.localtime(time.time()))
        year_str = time_str.split('-')[0][-2:]
        month_str = time_str.split('-')[1]
        day_str = time_str.split('-')[2]
        for seed in range(en_amount):
            train_data_use = trainset.drop(columns=[label]).reset_index(
                drop=True)
            train_label = trainset[label].reset_index(drop=True)
            train_label_index = train_label.astype('int')  ### ?

            kfold = StratifiedKFold(n_splits=NFOLDS,
                                    shuffle=True,
                                    random_state=seed)
            kf = kfold.split(train_data_use, train_label_index)
            for i, (trn_idx, val_idx) in enumerate(kf):
                trn_data = xgb.DMatrix(train_data_use.iloc[trn_idx, :],
                                       train_label[trn_idx])
                val_data = xgb.DMatrix(train_data_use.iloc[val_idx, :],
                                       train_label[val_idx])
                watchlist = [(trn_data, 'train'), (val_data, 'valid_data')]
                clf = xgb.train(dtrain=trn_data,
                                num_boost_round=10000,
                                evals=watchlist,
                                early_stopping_rounds=200,
                                verbose_eval=1000,
                                params=self.params)
                clf.save_model(
                    '../../data/xgb_model/1min_{}_{}_{}_{}{}{}.model'.format(
                        label, seed, i, year_str, month_str, day_str))
        print('xgb model saved..\n')

    # **
    # * @dev train SVR prediction model and use it to predict
    # * @pram col The column of variable to be predicted
    # * @pram data The dataset for training and prediction
    # * @return linear_svr_y_predict The outcome of prediction
    #
    def _train_regression(self):
        time_str = time.strftime('%Y-%m-%d', time.localtime(time.time()))
        year_str = time_str.split('-')[0][-2:]
        month_str = time_str.split('-')[1]
        day_str = time_str.split('-')[2]
        filename_data_ = ""
        dataset = pd.DataFrame()
        for month in range(int(month_str), 0, -1):
            for day in range(31, 0, -1):
                month = '0' + str(month) if len(
                    str(month)) == 1 else str(month)
                day = '0' + str(day) if len(str(day)) == 1 else str(day)
                filename_data = '1min_' + year_str + month + day + '.csv'
                # print(filename_data)
                try:
                    dataset = read_csv('../../data/env/' + filename_data,
                                       index_col=0)
                    filename_data_ = filename_data
                    break
                except:
                    pass
            if len(dataset) > 0:
                break

        df_1min_sql = dataset[[
            'To', 'Ho', 'So', 'year', 'month', 'day', 'hour', 'minute'
        ]]
        rl = pd.read_csv('../../data/caiyun/30sec_realtime_cy.csv')
        rl_1min = rl_1min = rl[rl['minute'] %
                               1 == 0].iloc[:, -23000:].reset_index(drop=True)
        rows = [
            x for x in range(len(rl_1min) - 1)
            if rl_1min.iloc[x]['minute'] == rl_1min.iloc[x + 1]['minute']
        ]
        rl_1min.drop(rows, inplace=True)
        rl_1min.reset_index(drop=True)
        rl_sql_1min = rl_1min.merge(
            df_1min_sql,
            how='left',
            on=['year', 'month', 'day', 'hour', 'minute'])
        rl_sql_1min.dropna(inplace=True)
        df_for_merge_1h = rl_sql_1min[['timestamp', 'To', 'Ho',
                                       'So']].rename(columns={
                                           'To': 'To_1',
                                           'Ho': 'Ho_1',
                                           'So': 'So_1'
                                       })
        df_for_merge_1h['timestamp'] = (df_for_merge_1h['timestamp'] +
                                        3600) / 60
        df_for_merge_1h['timestamp'] = df_for_merge_1h['timestamp'].astype(
            'int')

        df_for_merge_2h = rl_sql_1min[['timestamp', 'To', 'Ho',
                                       'So']].rename(columns={
                                           'To': 'To_2',
                                           'Ho': 'Ho_2',
                                           'So': 'So_2'
                                       })
        df_for_merge_2h['timestamp'] = (df_for_merge_2h['timestamp'] +
                                        3600 * 2) / 60
        df_for_merge_2h['timestamp'] = df_for_merge_2h['timestamp'].astype(
            'int')

        df_for_merge_3h = rl_sql_1min[['timestamp', 'To', 'Ho',
                                       'So']].rename(columns={
                                           'To': 'To_3',
                                           'Ho': 'Ho_3',
                                           'So': 'So_3'
                                       })
        df_for_merge_3h['timestamp'] = (df_for_merge_3h['timestamp'] +
                                        3600 * 3) / 60
        df_for_merge_3h['timestamp'] = df_for_merge_3h['timestamp'].astype(
            'int')

        rl_sql_1min['timestamp'] = (rl_sql_1min['timestamp']) / 60
        rl_sql_1min['timestamp'] = rl_sql_1min['timestamp'].astype('int')
        rl_sql_1min = pd.merge(rl_sql_1min,
                               df_for_merge_1h,
                               on=['timestamp'],
                               how='left')
        rl_sql_1min = pd.merge(rl_sql_1min,
                               df_for_merge_2h,
                               on=['timestamp'],
                               how='left')
        rl_sql_1min = pd.merge(rl_sql_1min,
                               df_for_merge_3h,
                               on=['timestamp'],
                               how='left')
        rl_sql_1min.dropna(inplace=True)

        dataset_To = rl_sql_1min[[
            'month', 'day', 'hour', 'To_cy', 'Ho_cy', 'Co_cy', 'So_cy', 'To_1',
            'Ho_1', 'So_1', 'To_2', 'Ho_2', 'So_2', 'To_3', 'Ho_3', 'So_3',
            'To'
        ]]
        dataset_Ho = rl_sql_1min[[
            'month', 'day', 'hour', 'To_cy', 'Ho_cy', 'Co_cy', 'So_cy', 'To_1',
            'Ho_1', 'So_1', 'To_2', 'Ho_2', 'So_2', 'To_3', 'Ho_3', 'So_3',
            'Ho'
        ]]
        self._train_xgb_model(dataset_To, label='To')
        print('Tem xgb model trained..\n')
        self._train_xgb_model(dataset_Ho, label='Ho')
        print('Hum xgb model trained..\n')