def __init__(self,dataset,horizion, executions =100, proportion = 0.80, test_observations = 0): self.__index = dataset.index self.__dataset = np.array(dataset) self.__horizion = horizion self.__time_delay = 1 self.__proportion = proportion self.__activation_function = '' self.__C = 1 self.__test_observations = test_observations self.__qtd_executions = executions self.trainScoreMASE = 0 self.trainScoreRMSE = 0 self.trainScoreMAPE = 0 self.testScoreMASE_one_step = 0 self.testScoreRMSE_one_step = 0 self.testScoreMAPE_one_step = 0 self.testScoreMASE_mult_setp = 0 self.testScoreRMSE_mult_setp = 0 self.testScoreMAPE_mult_setp = 0 self.trainPredict = [] self.testPredict = [] self.predict_horizon = [] self.trainPredict_Result = [] self.testPredict_Result = [] self.predict_horizon_Result = [] self._runTimeOpt = None self._runTime = None # split into train and test sets self.__train, self.__test = Util_NN.split_train_and_test(self.__dataset,proportion,1,self.__test_observations) self.__best_neuron_amount = 1 self.__best_error = 1000000000 self._runTimeOpt = obj.ModelsRunTime('ELM-OPT') startTime = dt.datetime.now() self.__grid_search_optimization_function() self._runTimeOpt.setTrainingTime(dt.datetime.now()-startTime) self._runTimeOpt.setTestTime(self._runTimeOpt.getTrainingTime()) # remove this line after tests self.__train, self.__test = Util_NN.split_train_and_test(self.__dataset,proportion,self.__time_delay,self.__test_observations) # reshape dataset self.__trainX, self.__trainY = Util_NN.create_dataset(self.__train, self.__time_delay) # reshape dataset for elm self.__dataTrain = np.zeros(((len(self.__train)-self.__time_delay),(self.__time_delay+1))) self.__dataTrain[:,1:(self.__time_delay+1)] = self.__trainX self.__dataTrain[:,0] = self.__trainY # reshape dataset self.__testX, self.__testY = Util_NN.create_dataset(self.__test, self.__time_delay) # reshape dataset for elm self.__dataTest = np.zeros(((len(self.__test)-self.__time_delay),(self.__time_delay+1))) self.__dataTest[:,1:(self.__time_delay+1)] = self.__testX self.__dataTest[:,0] = self.__testY if self.__test_observations == 0: self.__test_observations = len(self.__testY)
def crostonFit(self): modelName = 'CR' errorObjs = [] runTimeObj = obj.ModelsRunTime(modelName) startTime = None totalTime = None # Step 1: fit selected model startTime = dt.datetime.now() self.fittedModel = cr.Croston(self.trainData) self.fittedModel.fit() runTimeObj.setTrainingTime(dt.datetime.now() - startTime) self.fittedModelFinal = cr.Croston(self.data) self.fittedModelFinal.fit() # Step 2: get fitted values for training, test and forecasts trainingFit = pd.Series(np.ceil(self.fittedModel.fittedForecasts)) startTime = dt.datetime.now() testPredictions = pd.Series( np.ceil(self.fittedModel.forecast(len(self.testData)))) totalTime = dt.datetime.now() - startTime forecasts = pd.Series( np.ceil(self.fittedModelFinal.forecast(self.horizon))) # Step 3: set error errorObjs = self.setErrorData(trainingFit, testPredictions, runTimeObj) runTimeObj.setTestTime(runTimeObj.getTestTime() + totalTime) self.runTimeList.append(runTimeObj) # Add to ModelsResult list self.setModelResults(modelName, errorObjs, trainingFit, testPredictions, forecasts)
def __init__(self, dataset, horizion, activation_function, stopping_criterion = 'iterations', max_iterations= 400, batch= 32, proportion = 0.80, optimizer = 'adam', test_observations = 0): self.__index = dataset.index self.__dataset = np.array(dataset) self.__max_iterations = max_iterations self.__batch = batch self.__horizion = horizion self.__time_delay = 1 self.__stopping_criterion = stopping_criterion self.__proportion = proportion self.__optimizer = optimizer self.__activation_function = activation_function self.__test_observations = test_observations self.trainScoreMASE = 0 self.trainScoreRMSE = 0 self.trainScoreMAPE = 0 self.testScoreMASE_one_step = 0 self.testScoreRMSE_one_step = 0 self.testScoreMAPE_one_step = 0 self.testScoreMASE_mult_setp = 0 self.testScoreRMSE_mult_setp = 0 self.testScoreMAPE_mult_setp = 0 self.trainPredict = [] self.testPredict = [] self.predict_horizon = [] self.trainPredict_Result = [] self.testPredict_Result = [] self.predict_horizon_Result = [] self._runTimeOpt = None self._runTime = None # split into train and test sets self.__train, self.__test = Util_NN.split_train_and_test(self.__dataset,proportion,1,self.__test_observations) self.__best_neuron_amount = 1 self.__best_error = 1000000000 self._runTimeOpt = obj.ModelsRunTime('ANN-OPT') startTime = dt.datetime.now() self.__grid_search_optimization_function() self._runTimeOpt.setTrainingTime(dt.datetime.now()-startTime) self._runTimeOpt.setTestTime(self._runTimeOpt.getTrainingTime()) # remove this line after tests self.__train, self.__test = Util_NN.split_train_and_test(self.__dataset,proportion,self.__time_delay,self.__test_observations) # reshape the dataset self.__trainX, self.__trainY = Util_NN.create_dataset(self.__train, self.__time_delay) self.__testX, self.__testY = Util_NN.create_dataset(self.__test, self.__time_delay) if self.__test_observations == 0: self.__test_observations = len(self.__testY)
def ARFit(self): ''' Fits a autoregressive model. ''' modelName = 'AR' errorObjs = [] runTimeObj = obj.ModelsRunTime(modelName) startTime = None totalTime = None # Step 1: set training and test values startTime = dt.datetime.now() self.fittedModel = ar.AR(self.trainData) self.fittedModel = self.fittedModel.fit() runTimeObj.setTrainingTime(dt.datetime.now() - startTime) trainingFit = pd.Series(np.ceil(self.fittedModel.fittedvalues)) startTime = dt.datetime.now() testPredictions = pd.Series( np.ceil( self.fittedModel.predict(start=len(self.trainData), end=len(self.trainData) + len(self.testData) - 1, dynamic=False))) totalTime = dt.datetime.now() - startTime # Step 2: Training again with all data for accurate forecasts self.fittedModelFinal = ar.AR(self.data) self.fittedModelFinal = self.fittedModelFinal.fit() forecasts = pd.Series( np.ceil( self.fittedModelFinal.predict(start=len(self.data), end=len(self.data) + self.horizon - 1, dynamic=False))) '''Step 3: set error for AR, the size of trainData will be different from fitted values at model. Fill initial trainingPredictions with same data as real. This will no affect the evaluation metrics. ''' errorObjs = self.setErrorData(trainingFit, testPredictions, runTimeObj) runTimeObj.setTestTime(runTimeObj.getTestTime() + totalTime) self.runTimeList.append(runTimeObj) # Add to ModelsResult list self.setModelResults(modelName, errorObjs, trainingFit, testPredictions, forecasts)
def __combination_executions(self, dataset, train, test, train_aux, executions = 100, neurons = 10,c = 1, function = 'sigmoid', time_delay = 1): executions_trainPredict = [] executions_testPredict_one_step = [] executions_testPredict_mult_step = [] executions_predict_horizon = [] median_trainPredict = [] median_testPredict_one_step = [] median_testPredict_mult_step = [] median_predict_horizon = [] startTimeTraining = dt.datetime.now() trainTimeCum = startTimeTraining - startTimeTraining #due to cumulative sum in the loop startTimeTest = startTimeTraining testTimeCum = trainTimeCum for t in range(0,executions): #create elm model self.__model = self.__creat_model(neurons,function,c) #train elm model startTimeTraining = dt.datetime.now() tr_result = self.__model.train(train) trainTimeCum += dt.datetime.now() - startTimeTraining startTimeTest = dt.datetime.now() te_result = self.__model.test(test) testTimeCum += dt.datetime.now() - startTimeTest trainPredict = tr_result.predicted_targets testPredict = te_result.predicted_targets predictionsX, predictionsY = self.__forecating(self.__horizion,time_delay,dataset,self.__model) test_X, test_Y = self.__forecating(len(test),time_delay,train_aux,self.__model) executions_trainPredict.append(trainPredict) executions_testPredict_one_step.append(testPredict) executions_testPredict_mult_step.append(test_Y) executions_predict_horizon.append(predictionsY) #make the median combination of the predictions startTimeTraining = dt.datetime.now() for x in range(len(trainPredict)): c = np.array(executions_trainPredict) c = c[:,x] c = st.median(c) median_trainPredict.append(c) trainTimeCum += dt.datetime.now() - startTimeTraining self._runTime = obj.ModelsRunTime('ELM') self._runTime.setTrainingTime(trainTimeCum) for x in range(len(testPredict)): c = np.array(executions_testPredict_one_step) c = c[:,x] c = st.median(c) median_testPredict_one_step.append(c) startTimeTest = dt.datetime.now() for x in range(len(test_Y)): c = np.array(executions_testPredict_mult_step) c = c[:,x] c = st.median(c) median_testPredict_mult_step.append(c) testTimeCum += dt.datetime.now() - startTimeTest self._runTime.setTestTime(testTimeCum) for x in range(len(predictionsY)): c = np.array(executions_predict_horizon) c = c[:,x] c = st.median(c) median_predict_horizon.append(c) return median_trainPredict, median_testPredict_one_step, median_testPredict_mult_step, median_predict_horizon
def combinationFit(self, model): ''' Linear combination of models. Some matrix algebra is necessary here. Using NumPy arrays for calculations. Variables --------- self.weights: a 1xN vector of weights of each model in pool. The weights can be equal for all models or assigned conform to some criteria. traningMatrix,testMatrix,forecastMatrix: Each column is a point on time and each row, the forecasts made by one model. The combinations are performed over the columns. ''' runTime = obj.ModelsRunTime(model) startTimeTraining = dt.datetime.now() totalTimeTraining = startTimeTraining - startTimeTraining startTimeTest = startTimeTraining totalTimeTest = totalTimeTraining # Step 1: set weights if model == 'CFM': coefVector = np.zeros(self.numModels) coefVector[:] = 1 / self.numModels weights = np.array(coefVector) else: weights = self.setWeights() trainIndexes = self.getMinTrainIndex() # Step 2: build matrix of training/test/forecast values traningMatrix = np.array([]) testMatrix = np.array([]) forecastMatrix = np.array([]) sizeTraining = len(trainIndexes) sizeTest = len(self.testData) # Same indexes for all models, including combination. trainingIdx = trainIndexes testIdx = self.testData.index forecastIdx = self.modelsResult[0].forecastDemand.index for m in self.modelsResult: if m.model not in ['CFE', 'CFM']: startTimeTraining = dt.datetime.now() traningMatrix = np.append( traningMatrix, m.trainingPrediction[trainIndexes].values) totalTimeTraining += dt.datetime.now() - startTimeTraining startTimeTest = dt.datetime.now() testMatrix = np.append(testMatrix, m.testPrediction.values) totalTimeTest += dt.datetime.now() - startTimeTest forecastMatrix = np.append(forecastMatrix, m.forecastDemand.values) # Reshape to get matrix startTimeTraining = dt.datetime.now() traningMatrix = np.reshape(traningMatrix, [self.numModels, sizeTraining]) totalTimeTraining += dt.datetime.now() - startTimeTraining startTimeTest = dt.datetime.now() testMatrix = np.reshape(testMatrix, [self.numModels, sizeTest]) totalTimeTest += dt.datetime.now() - startTimeTest forecastMatrix = np.reshape(forecastMatrix, [self.numModels, self.horizon]) # Step 3: compute forecasts by matrix multiplication if self.combType == 'trimmed': # Exclude min and max from each predictions (lines) # This approach is not used anymore. trainingFit = ModelsManager.makeTrimmedMean(traningMatrix) testPredictions = ModelsManager.makeTrimmedMean(testMatrix) forecasts = ModelsManager.makeTrimmedMean(forecastMatrix) else: startTimeTraining = dt.datetime.now() trainingFit = np.matmul(weights, traningMatrix) totalTimeTraining += dt.datetime.now() - startTimeTraining startTimeTest = dt.datetime.now() testPredictions = np.matmul(weights, testMatrix) totalTimeTest += dt.datetime.now() - startTimeTest forecasts = np.matmul(weights, forecastMatrix) trainingFit = pd.Series(np.ceil(trainingFit), trainingIdx) testPredictions = pd.Series(np.ceil(testPredictions), testIdx) forecasts = pd.Series(np.ceil(forecasts), forecastIdx) errorObjs = self.setErrorData(trainingFit, testPredictions, runTime) totalTimeTest += runTime.getTestTime() for t in self.runTimeList: if t.getModelName() not in ['ANN-OPT', 'ELM-OPT', 'CFM', 'CFE']: totalTimeTest += t.getTestTime() totalTimeTraining += t.getTrainingTime() runTime.setTestTime(totalTimeTest) runTime.setTrainingTime(totalTimeTraining) self.runTimeList.append(runTime) # Add to ModelsResult list self.setModelResults(model, errorObjs, trainingFit, testPredictions, forecasts)
def exponentialFit(self, name): ''' Parameters ---------- name: name of model ''' modelName = name errorObjs = [] runTimeObj = obj.ModelsRunTime(name) startTime = None totalTime = None # Step 1: fit selected model if name == 'NAIVE': # for evaluation startTime = dt.datetime.now() self.fittedModel = ts.ExponentialSmoothing(self.trainData) self.fittedModel = self.fittedModel.fit(smoothing_level=1) runTimeObj.setTrainingTime(dt.datetime.now() - startTime) # for real forecasts self.fittedModelFinal = ts.ExponentialSmoothing(self.data) self.fittedModelFinal = self.fittedModelFinal.fit( smoothing_level=1) elif name == 'SES': # for evaluation startTime = dt.datetime.now() self.fittedModel = ts.SimpleExpSmoothing(self.trainData) self.fittedModel = self.fittedModel.fit( optimized=True, use_brute=True) #grid search runTimeObj.setTrainingTime(dt.datetime.now() - startTime) # for real forecasts self.fittedModelFinal = ts.SimpleExpSmoothing(self.data) self.fittedModelFinal = self.fittedModelFinal.fit( optimized=True, use_brute=True) #grid search elif name == 'HOLT': # Holt's linear trend method # for evaluation startTime = dt.datetime.now() self.fittedModel = ts.Holt(self.trainData) self.fittedModel = self.fittedModel.fit( optimized=True, use_brute=True) #grid search runTimeObj.setTrainingTime(dt.datetime.now() - startTime) # for real forecasts self.fittedModelFinal = ts.Holt(self.data) self.fittedModelFinal = self.fittedModelFinal.fit( optimized=True, use_brute=True) #grid search # Step 2: get fitted values for training, test and forecasts trainingFit = pd.Series(np.ceil(self.fittedModel.fittedvalues)) startTime = dt.datetime.now() testPredictions = pd.Series( np.ceil(self.fittedModel.forecast(len(self.testData)))) totalTime = dt.datetime.now() - startTime forecasts = pd.Series( np.ceil(self.fittedModelFinal.forecast(self.horizon))) # Step 3: set error errorObjs = self.setErrorData(trainingFit, testPredictions, runTimeObj) runTimeObj.setTestTime(runTimeObj.getTestTime() + totalTime) self.runTimeList.append(runTimeObj) # Add to ModelsResult list self.setModelResults(modelName, errorObjs, trainingFit, testPredictions, forecasts)
def predictions(self, executions = 20): list_train = [] #list_test_one_step = [] list_test_mult_step = [] list_predict_horizon = [] median_trainPredict = [] #median_testPredict_one_step = [] median_testPredict_mult_step = [] median_predict_horizon = [] startTimeTraining = dt.datetime.now() trainTimeCum = startTimeTraining - startTimeTraining #due to cumulative sum in the loop startTimeTest = startTimeTraining testTimeCum = trainTimeCum for r in range(executions): startTimeTraining = dt.datetime.now() # train neural network model with chosen stopping criterion self.__training_model(self.__time_delay,self.__best_neuron_amount,self.__trainX,self.__trainY) # generate predictions for training self.trainPredict = Util_NN.convert_column_in_row(self.__model.predict(self.__trainX)) #self.testPredict = Util_NN.convert_column_in_row(self.__model.predict(self.__testX)) trainTimeCum += dt.datetime.now() - startTimeTraining startTimeTest = dt.datetime.now() test_X, test_Y = self.__forecating(self.__test_observations,self.__time_delay,self.__train,self.__model) testTimeCum += dt.datetime.now() - startTimeTest horizon_X, horizon_Y = self.__forecating(self.__horizion,self.__time_delay,self.__dataset,self.__model) predict_horizon = horizon_Y testPredict_mult_step = test_Y list_train.append(self.trainPredict) #list_test_one_step.append(self.testPredict) list_test_mult_step.append(testPredict_mult_step) list_predict_horizon.append(predict_horizon) startTimeTraining = dt.datetime.now() #make the median combination of the predictions for x in range(len(self.trainPredict)): c = np.array(list_train) c = c[:,x] c = st.median(c) median_trainPredict.append(c) trainTimeCum += dt.datetime.now() - startTimeTraining self._runTime = obj.ModelsRunTime('ANN') self._runTime.setTrainingTime(trainTimeCum) #for x in range(len(self.testPredict)): #c = np.array(list_test_one_step) #c = c[:,x] #c = st.median(c) #median_testPredict_one_step.append(c) startTimeTest = dt.datetime.now() for x in range(len(testPredict_mult_step)): c = np.array(list_test_mult_step) c = c[:,x] c = st.median(c) median_testPredict_mult_step.append(c) testTimeCum += dt.datetime.now() - startTimeTest self._runTime.setTestTime(testTimeCum) for x in range(len(predict_horizon)): c = np.array(list_predict_horizon) c = c[:,x] c = st.median(c) median_predict_horizon.append(c) self.trainPredict = median_trainPredict #self.testPredict = median_testPredict_one_step self.testPredict = median_testPredict_mult_step testPredict_mult_step = median_testPredict_mult_step self.predict_horizon = median_predict_horizon #self.trainScoreMASE, self.trainScoreRMSE, self.trainScoreMAPE, self.testScoreMASE_one_step, self.testScoreRMSE_one_step, self.testScoreMAPE_one_step = Util_NN.evaluate_all_errors(self.__trainY,self.trainPredict,self.__testY,self.testPredict,self.__train) self.trainScoreMASE, self.trainScoreRMSE, self.trainScoreMAPE, self.testScoreMASE_mult_setp, self.testScoreRMSE_mult_setp, self.testScoreMAPE_mult_setp = Util_NN.evaluate_all_errors(self.__trainY, self.trainPredict, self.__testY, testPredict_mult_step, self.__train) #print_chart #trainPredictPlot, testPredictPlot, previsoesPlot = Util_NN.print_chart(self.__dataset,self.trainPredict,self.testPredict,self.predict_horizon,self.__horizion,self.__time_delay) self.trainPredict_Result , self.testPredict_Result, self.predict_horizon_Result = Util_NN.treat_output(self.__dataset,self.__index,self.trainPredict,self.testPredict,self.predict_horizon,self.__horizion,self.__time_delay)