def performEvaluations(filename, train_window=3000, overload_dur=5, overload_percentile=70, steps=30): cur_results = [] forecasts = np.nan_to_num( np.genfromtxt("d:/data/" + TYPE + "_" + METHOD + "/" + filename, delimiter=',', usecols=range(0, 30))).ravel() # ,usecols=range(0,30) truevals = np.genfromtxt("d:/data/" + TYPE + "/" + filename, delimiter=',', skip_header=1)[:train_window + len(forecasts), 1] # Normalize # truevals = np.divide(truevals, np.max(truevals)) threshold = np.percentile(truevals, overload_percentile) cur_results.append(eval.calc_RMSE(truevals[train_window:], forecasts)) for val in eval.calc_upper_lower_acc(truevals[train_window:], forecasts): cur_results.append(val) for val in eval.calc_persample_accuracy(truevals[train_window:], forecasts, threshold): cur_results.append(val) for val in eval.calc_overload_states_acc(truevals[train_window:], forecasts, threshold): cur_results.append(val) return cur_results
def trainFunc(params): iter, trainds, validds, input_size, hidden, func, eta, lmda, epochs = params print('Iter:', iter, 'Epochs:', epochs, 'Hidden_size:', hidden, 'Eta:', eta, 'Lamda:', lmda, 'Activation:', func) # Build network n = RecurrentNetwork() n.addInputModule(LinearLayer(input_size, name='in')) n.addModule(func(hidden, name='hidden')) n.addModule(LinearLayer(hidden, name='context')) n.addOutputModule(LinearLayer(1, name='out')) n.addConnection(FullConnection(n['in'], n['hidden'], name='in_to_hidden')) n.addConnection(FullConnection(n['hidden'], n['out'], name='hidden_to_out')) n.addRecurrentConnection(FullConnection(n['hidden'], n['context'])) rnet = n rnet.sortModules() trainer = BackpropTrainer(n, trainds, learningrate=eta, weightdecay=lmda, momentum=0.1, shuffle=False) trainer.trainEpochs(epochs) pred = np.nan_to_num(n.activateOnDataset(validds)) validerr = eval.calc_RMSE(validds['target'], pred) varscore = explained_variance_score(validds['target'], pred) return validerr, varscore, n
def performEvaluations(params, train_window = 3000, overload_dur = 5, overload_percentile = 70, predic_window=30): filename, METHOD, TYPE, OUTPUT, INPUT = params[:5] filename = filename.split('/')[-1] print OUTPUT+TYPE+"_"+METHOD+"/" + filename, "started..." cur_results = [] forecasts = np.nan_to_num(np.genfromtxt(OUTPUT+TYPE+"_"+METHOD+"/" + filename, delimiter=',',usecols=range(0,predic_window))).ravel() # ,usecols=range(0,30) if TYPE == 'pageviews' or TYPE == 'network': filename = filename.replace(".csv","") truevals = np.genfromtxt(INPUT+TYPE+"/"+filename)[:train_window+len(forecasts)] truevals = truevals/np.max(truevals) else: truevals = np.genfromtxt(INPUT+TYPE+"/"+filename, delimiter=',',skip_header=1)[:train_window+len(forecasts),1] # Normalize # truevals = np.divide(truevals, np.max(truevals)) threshold = np.percentile(truevals, overload_percentile) cur_results.append(eval.calc_RMSE(truevals[train_window:], forecasts)) for val in eval.calc_upper_lower_acc(truevals[train_window:], forecasts): cur_results.append(val) for val in eval.calc_persample_accuracy(truevals[train_window:], forecasts, threshold): cur_results.append(val) for val in eval.calc_overload_states_acc(truevals[train_window:], forecasts, threshold): cur_results.append(val) return cur_results
def performEvaluations(filename, train_window=3000, overload_dur=5, overload_percentile=70, steps=30): cur_results = [] forecasts = np.genfromtxt("d:/data/cpu_norm_forecasts/" + filename, delimiter=',', usecols=range(0, steps)).ravel() truevals = np.genfromtxt("d:/data/cpuRate/" + filename, delimiter=',', skip_header=1)[train_window:train_window + len(forecasts), 1] threshold = np.percentile(truevals, overload_percentile) cur_results.append(eval.calc_RMSE(truevals, forecasts)) for val in eval.calc_upper_lower_acc(truevals, forecasts): cur_results.append(val) for val in eval.calc_persample_accuracy(truevals, forecasts, threshold): cur_results.append(val) for val in eval.calc_overload_states_acc(truevals, forecasts, threshold): cur_results.append(val) return cur_results
def trainFunc(params): iter, trainds, validds, input_size, hidden, func, eta, lmda, epochs = params print("Iter:", iter, "Epochs:", epochs, "Hidden_size:", hidden, "Eta:", eta, "Lamda:", lmda, "Activation:", func) net = buildNetwork(input_size, hidden, 1, bias=True) trainer = BackpropTrainer(net, trainds, learningrate=eta, weightdecay=lmda, momentum=0.1, shuffle=False) trainer.trainEpochs(epochs) pred = np.nan_to_num(net.activateOnDataset(validds)) validerr = eval.calc_RMSE(validds["target"], pred) varscore = explained_variance_score(validds["target"], pred) return validerr, varscore, net
def trainFunc(params): iter, trainds, validds, input_size, hidden, func, eta, lmda, epochs = params print('Iter:', iter, 'Epochs:', epochs, 'Hidden_size:', hidden, 'Eta:', eta, 'Lamda:', lmda, 'Activation:', func) net = buildNet(input_size, hidden) trainer = BackpropTrainer(net, trainds, learningrate=eta, weightdecay=lmda, momentum=0.1, shuffle=False) trainer.trainEpochs(epochs) pred = np.nan_to_num(net.activateOnDataset(validds)) validerr = eval.calc_RMSE(validds['target'], pred) varscore = explained_variance_score(validds['target'], pred) return validerr, varscore, net
def performEvaluations(filename, train_window = 3000, overload_dur = 5, overload_percentile = 70, steps=30): cur_results = [] filename forecasts = np.nan_to_num(np.genfromtxt("d:/Wikipage data/"+TYPE+"_"+METHOD+"/" + filename, delimiter=',',usecols=range(0,30))).ravel() # # truevals = np.genfromtxt("d:/Wikipage data/"+TYPE+"/"+filename, delimiter=',',skip_header=1)[:train_window+len(forecasts),1] truevals = np.genfromtxt("d:/Wikipage data/"+TYPE+"/"+filename)[:train_window+len(forecasts)] truevals = truevals/np.max(truevals) cur_results.append(eval.calc_RMSE(truevals[train_window:], forecasts)) for val in eval.calc_upper_lower_acc(truevals[train_window:], forecasts): cur_results.append(val) return cur_results
def performEvaluations(filename, train_window = 3000, overload_dur = 5, overload_percentile = 70, steps=30): cur_results = [] forecasts = np.genfromtxt("d:/data/diskio_ar_forecasts/"+ filename,delimiter=',',usecols=range(0,steps)).ravel() truevals = np.genfromtxt("d:/data/diskio/"+filename, delimiter=',',skip_header=1)[train_window:train_window+len(forecasts),1] threshold = np.percentile(truevals, overload_percentile) cur_results.append(eval.calc_RMSE(truevals, forecasts)) for val in eval.calc_upper_lower_acc(truevals, forecasts): cur_results.append(val) for val in eval.calc_persample_accuracy(truevals, forecasts, threshold): cur_results.append(val) for val in eval.calc_overload_states_acc(truevals, forecasts, threshold): cur_results.append(val) return cur_results
def trainFunc(params): iter, trainds, validds, input_size, hidden, func, eta, lmda, epochs = params print('Iter:', iter, 'Epochs:', epochs, 'Hidden_size:', hidden, 'Eta:', eta, 'Lamda:', lmda, 'Activation:', func) net = buildNetwork(input_size, hidden, 1, bias=True) trainer = BackpropTrainer(net, trainds, learningrate=eta, weightdecay=lmda, momentum=0.1, shuffle=False) trainer.trainEpochs(epochs) pred = np.nan_to_num(net.activateOnDataset(validds)) validerr = eval.calc_RMSE(validds['target'], pred) varscore = explained_variance_score(validds['target'], pred) return validerr, varscore, net
def performEvaluations(filename, train_window = 3000, overload_dur = 5, overload_percentile = 70, steps=30): cur_results = [] forecasts = np.nan_to_num(np.genfromtxt("d:/data/"+TYPE+"_"+METHOD+"/" + filename, delimiter=',',usecols=range(0,30))).ravel() # ,usecols=range(0,30) truevals = np.genfromtxt("d:/data/"+TYPE+"/"+filename, delimiter=',',skip_header=1)[:train_window+len(forecasts),1] # Normalize # truevals = np.divide(truevals, np.max(truevals)) threshold = np.percentile(truevals, overload_percentile) cur_results.append(eval.calc_RMSE(truevals[train_window:], forecasts)) for val in eval.calc_upper_lower_acc(truevals[train_window:], forecasts): cur_results.append(val) for val in eval.calc_persample_accuracy(truevals[train_window:], forecasts, threshold): cur_results.append(val) for val in eval.calc_overload_states_acc(truevals[train_window:], forecasts, threshold): cur_results.append(val) return cur_results
def trainFunc(params): iter, trainds, validds, input_size, hidden, func, eta, lmda, epochs = params print('Iter:', iter, 'Epochs:', epochs, 'Hidden_size:', hidden, 'Eta:', eta, 'Lamda:', lmda, 'Activation:', func) # Build network n = RecurrentNetwork() n.addInputModule(LinearLayer(input_size, name = 'in')) n.addModule(func(hidden, name = 'hidden')) n.addModule(LinearLayer(hidden, name = 'context')) n.addOutputModule(LinearLayer(1, name = 'out')) n.addConnection(FullConnection(n['in'], n['hidden'], name = 'in_to_hidden')) n.addConnection(FullConnection(n['hidden'], n['out'], name = 'hidden_to_out')) n.addRecurrentConnection(FullConnection(n['hidden'], n['context'])) rnet = n rnet.sortModules() trainer = BackpropTrainer(n, trainds, learningrate=eta, weightdecay=lmda, momentum=0.1, shuffle=False) trainer.trainEpochs(epochs) pred = np.nan_to_num(n.activateOnDataset(validds)) validerr = eval.calc_RMSE(validds['target'], pred) varscore = explained_variance_score(validds['target'], pred) return validerr, varscore, n
def performEvaluations(filename, train_window=3000, overload_dur=5, overload_percentile=70, steps=30): cur_results = [] filename forecasts = np.nan_to_num( np.genfromtxt("d:/Wikipage data/" + TYPE + "_" + METHOD + "/" + filename, delimiter=',', usecols=range(0, 30))).ravel() # # truevals = np.genfromtxt("d:/Wikipage data/"+TYPE+"/"+filename, delimiter=',',skip_header=1)[:train_window+len(forecasts),1] truevals = np.genfromtxt("d:/Wikipage data/" + TYPE + "/" + filename)[:train_window + len(forecasts)] truevals = truevals / np.max(truevals) cur_results.append(eval.calc_RMSE(truevals[train_window:], forecasts)) for val in eval.calc_upper_lower_acc(truevals[train_window:], forecasts): cur_results.append(val) return cur_results
def ensembleModel(params, types=['ma','ar','fnn','agile'], step=30, input_window=3000): input_size = len(types) filename, METHOD, TYPE, OUTPUT = params[0:4] filename = filename.split('/')[-1] filename, METHOD, TYPE, OUTPUT = params[0:4] filename = filename.split('/')[-1] combine_model = np.genfromtxt(OUTPUT+TYPE+"_"+types[0]+"/"+filename, delimiter=',', usecols=range(0,30)).ravel() truevals = np.nan_to_num(np.genfromtxt(OUTPUT+TYPE+"/"+filename, delimiter=',',skip_header=1, usecols=(1))[:input_window+len(combine_model)]) for t in types[1:]: forecasts = np.genfromtxt(OUTPUT+TYPE+"_"+t+"/"+filename, delimiter=',', usecols=range(0,30)).ravel() combine_model = np.vstack((combine_model, forecasts)) average_fc = np.average(combine_model, axis=0) if METHOD == 'avg4': fileutils.writeCSV(OUTPUT+TYPE+"_"+METHOD+"/"+filename, np.atleast_2d(average_fc).reshape([178,30])) print filename, "complete" return if METHOD == 'combo4' or METHOD =='wa': training = SupervisedDataSet(input_size, 1) for i in range(step): training.appendLinked([combine_model[t][i] for t in range(input_size)], truevals[i+input_window]) besterr = eval.calc_RMSE(truevals[input_window:input_window+step], average_fc[:step]) bestNet = None for i in range(50): if METHOD == 'wa': net = buildNetwork(input_size, 1, hiddenclass=LinearLayer, bias=False) else: net = buildNetwork(input_size, 2, 1, hiddenclass=LinearLayer, bias=False) trainer = BackpropTrainer(net, training, learningrate=0.001, shuffle=False) trainer.trainEpochs(100) err = eval.calc_RMSE(truevals[input_window:input_window+step], net.activateOnDataset(training)) if err < besterr: bestNet = net break combo_fc = average_fc[0:step].tolist() # combo_fc = [] if bestNet == None: combo_fc = average_fc else: for i in range(step, len(combine_model[0]), step): training = SupervisedDataSet(input_size, 1) for j in range(i,i+step): combo_fc.append(bestNet.activate([combine_model[t][j] for t in range(input_size)])[0]) training.appendLinked([combine_model[t][j] for t in range(input_size)], truevals[j+input_window]) trainer = BackpropTrainer(bestNet, training, learningrate=0.01, shuffle=False) trainer.trainEpochs(2) result = np.atleast_2d(combo_fc).reshape([178,30]) minimum = np.percentile(truevals,5) result[0,result[0,:] < minimum] = minimum fileutils.writeCSV(OUTPUT+TYPE+"_"+METHOD+"/"+filename, result) print filename, "complete"