def performsSlidingWindowForecast(filename, minpercentile=5, step=30, input_window=3000, predic_window=30): ''' Input window = 250 hours = 250*12 = 3000 look ahead window 60 samples = 5 hours = 720min/5 = 60 ''' data = np.genfromtxt(filename) data = data/np.max(data) minimum = np.percentile(data,minpercentile) N = len(data) result = [] print filename, "started..." y = data[0:input_window].tolist() model = HW_model.HW_model(y, minimum, 'additive') for strIndex in range(0,N-input_window - predic_window, step): if strIndex == 0: model.fit() else: y = data[strIndex:strIndex+input_window].tolist() model.update(y) y_pred = model.predict(fc=predic_window) result.append(y_pred) f = filename.split('/')[-1] fileutils.writeCSV("D:/Wikipage data/pageviews_hw/"+f, np.atleast_2d(result)) print filename, "complete!"
def performsSlidingWindowForecast(filename_eta_lmda, minpercentile=5, step=30, input_window=3000, predic_window=30): ''' Input window = 250 hours = 250*12 = 3000 look ahead window 60 samples = 5 hours = 720min/5 = 60 ''' filename, curEta, curLmda = filename_eta_lmda data = np.genfromtxt(filename, delimiter=',', skip_header=1, usecols=(1)) minimum = np.percentile(data,minpercentile) N = len(data) print filename, "started..." curMachine = filename.split('/')[-1] model = Fnn_model.Fnn_model(data=data, machineID = curMachine,netPath='../data/cpu2_networks/'+curMachine.replace(".csv",".xml"), eta=curEta, lmda=curLmda) model.fit() pred = [] # lastFc = 0 for p in range(input_window, len(data)-predic_window,predic_window): fc = np.array(model.predict(predic_window)).flatten() # fc[-1] = lastFc # lastFc = fc[0] fc[fc<0] = minimum pred.append(fc) model.update() pred = np.array(pred) f = filename.split('/')[-1] fileutils.writeCSV("d:/data/cpu2_fnn/"+f, np.atleast_2d(pred)) print filename, "complete!"
def performsSlidingWindowForecast(filename, minpercentile=5, step=30, input_window=3000, predic_window=30): ''' Input window = 250 hours = 250*12 = 3000 look ahead window 60 samples = 5 hours = 720min/5 = 60 ''' data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)) minimum = np.percentile(data[:, 1], minpercentile) N = len(data[:, 1]) result = [] print filename, "started..." for strIndex in range(0, N - input_window - predic_window, step): if strIndex == 0: y = data[:input_window, 1] model = Norm_model.Norm_model(y) model.fit() else: y = data[strIndex:strIndex + input_window, 1] model.update(y) y_pred = model.predict(predic_window) y_pred[y_pred < 0] = minimum result.append(y_pred) f = filename.split('/')[-1] fileutils.writeCSV("d:/data/cpu_norm_forecasts/" + f, np.atleast_2d(result)) print filename, "complete!"
def extractMachineData(): datafiles = fileutils.getFilelist("D:/googleClusterData/clusterdata-2011-1/task_usage") machineUsage = {} startAt = 0; for machine in machines: machineUsage[machine] = [] for datafile in datafiles[startAt:]: print datafile for row in fileutils.getCsvRows(datafile): curMachine = row[4] if curMachine in machines: machineUsage[curMachine].append(row) for machine in machineUsage.keys(): if startAt == 0: fileutils.writeCSV("d:/data/perMachine5/"+machine+".csv", machineUsage[machine]) else: fileutils.writeCSV("d:/data/perMachine5/"+machine+".csv", machineUsage[machine], mode='ab') startAt += 1 machineUsage.clear() for machine in machines: machineUsage[machine] = []
def performsSlidingWindowForecast(filename, minpercentile=5, step=30, input_window=3000, predic_window=30): ''' Input window = 250 hours = 250*12 = 3000 look ahead window 60 samples = 5 hours = 720min/5 = 60 ''' #Wikidata # data = np.genfromtxt(filename) # data = data/np.max(data) data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)[:,1]).ravel() minimum = np.percentile(data,minpercentile) N = len(data) result = [] print filename, "started..." for strIndex in range(0,N-input_window - predic_window, step): if strIndex == 0: y = data[:input_window] model = AR_model.AR_model(y, order=30) model.fit() else: y = data[strIndex:strIndex+input_window] model.update(y) y_pred = model.predict(predic_window) y_pred[y_pred[:,0]<0,0] = minimum result.append(y_pred[:,0]) f = filename.split('/')[-1] fileutils.writeCSV("d:/data/"+TYPE+"_ar/"+f, np.atleast_2d(result)) print filename, "complete!"
def performsSlidingWindowForecast(filename, minpercentile=5, step=30, input_window=3000, predic_window=30): ''' Input window = 250 hours = 250*12 = 3000 look ahead window 60 samples = 5 hours = 720min/5 = 60 ''' # Wikidata # data = np.genfromtxt(filename) # data = data/np.max(data) data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)[:,1]).ravel() minimum = np.percentile(data,minpercentile) N = len(data) result = [] max = np.max(data) print filename, "started..." for strIndex in range(0,N-input_window - predic_window, step): if strIndex == 0: y = data[:input_window] model = Press_model.Press_model(y, maximum=max) model.fit() else: # y = data[strIndex:strIndex+input_window,1] y = data[input_window + strIndex - step:input_window + strIndex] model.update(y) y_pred = model.predict(predic_window) y_pred[y_pred<0] = minimum result.append(y_pred) res = np.zeros((len(result),predic_window)) for i in range(len(result)): res[i,:len(result[i])] = result[i] f = filename.split('/')[-1] fileutils.writeCSV("d:/data/"+TYPE+"_press/"+f, np.atleast_2d(res)) print filename, "complete!"
def performsSlidingWindowForecast(filename_eta_lmda, minpercentile=5, step=30, input_window=3000, predic_window=30): ''' Input window = 250 hours = 250*12 = 3000 look ahead window 60 samples = 5 hours = 720min/5 = 60 ''' filename, curEta, curLmda = filename_eta_lmda data = np.genfromtxt(filename, delimiter=',', skip_header=1, usecols=(1)) minimum = np.percentile(data,minpercentile) N = len(data) print filename, "started..." curMachine = filename.split('/')[-1][:-4] # curMachine = "cpu" + filename.split('/')[-1].strip('.csv') model = Rnn_model.Rnn_model(data=data, machineID = curMachine, eta=curEta, lmda=curLmda) model.fit() pred = [] lastFc = None for p in range(input_window, len(data)-predic_window,predic_window): fc = model.predict(predic_window) if lastFc is not None: fc[0] = lastFc lastFc = fc[-1] fc[fc<0] = minimum pred.append(fc) model.update() pred = np.array(pred).ravel() f = filename.split('/')[-1] fileutils.writeCSV("d:/data/cpu_rnn_forecasts/"+f, np.atleast_2d(pred)) print filename, "complete!"
def performsSlidingWindowForecast(filename, minpercentile=5, step=30, input_window=3000, predic_window=30): ''' Input window = 250 hours = 250*12 = 3000 look ahead window 60 samples = 5 hours = 720min/5 = 60 ''' data = np.genfromtxt(filename) data = data / np.max(data) minimum = np.percentile(data, minpercentile) N = len(data) result = [] print filename, "started..." y = data[0:input_window].tolist() model = HW_model.HW_model(y, minimum, 'additive') for strIndex in range(0, N - input_window - predic_window, step): if strIndex == 0: model.fit() else: y = data[strIndex:strIndex + input_window].tolist() model.update(y) y_pred = model.predict(fc=predic_window) result.append(y_pred) f = filename.split('/')[-1] fileutils.writeCSV("D:/Wikipage data/pageviews_hw/" + f, np.atleast_2d(result)) print filename, "complete!"
def extractMachineData(): datafiles = fileutils.getFilelist( "D:/googleClusterData/clusterdata-2011-1/task_usage") machineUsage = {} startAt = 0 for machine in machines: machineUsage[machine] = [] for datafile in datafiles[startAt:]: print datafile for row in fileutils.getCsvRows(datafile): curMachine = row[4] if curMachine in machines: machineUsage[curMachine].append(row) for machine in machineUsage.keys(): if startAt == 0: fileutils.writeCSV("d:/data/perMachine5/" + machine + ".csv", machineUsage[machine]) else: fileutils.writeCSV("d:/data/perMachine5/" + machine + ".csv", machineUsage[machine], mode='ab') startAt += 1 machineUsage.clear() for machine in machines: machineUsage[machine] = []
def performsSlidingWindowForecast(filename, minpercentile=5, step=30, input_window=3000, predic_window=30, order_=1): ''' Input window = 250 hours = 250*12 = 3000 look ahead window 60 samples = 5 hours = 720min/5 = 60 ''' # data = np.genfromtxt(filename) # data = data/np.max(data) data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)[:,1]).ravel() minimum = np.percentile(data,minpercentile) N = len(data) result = [] max = np.max(data) print filename, "started..." for strIndex in range(0,N-input_window - predic_window, step): if strIndex == 0: y = data[:input_window] model = Markov_model.Markov_model(y, maximum=max, order=order_) model.fit() else: # y = data[strIndex:strIndex+input_window,1] y = data[input_window + strIndex - step:input_window + strIndex] model.update(y) y_pred = model.predict(predic_window) y_pred[y_pred<0] = minimum result.append(y_pred.ravel()) f = filename.split('/')[-1] fileutils.writeCSV("d:/data/cpu_markov"+str(order_)+"/"+f, np.atleast_2d(result)) print filename, "complete!"
def readAndAggregate(filename, outputDir, resource='cpu'): colomn = 1 # start time if resource == 'cpu': colomn = 5 elif resource == 'memory': colomn = 7 elif resource == 'diskIO': colomn = 11 resourcePerTask = np.genfromtxt(filename, delimiter=',', skiprows=0, usecols=(0, colomn), filling_values='0') fileCsv = filename.split('/')[-1] strTime = 600e6 endTime = strTime + 300e6 globalEndTime = 2506200000000 numberOfRows = globalEndTime / 300e6 aggregatedData = np.zeros([numberOfRows, 2], dtype=float) x = 0 aggregatedData[x, 0] = strTime for row in resourcePerTask[:]: time = np.float_(row[0]) if (time >= strTime and time < endTime): row_f = np.float_(row[1]) aggregatedData[x, 1] += row_f else: strTime = endTime endTime += 300e6 x += 1 aggregatedData[x, 0] = strTime while x < numberOfRows: strTime = endTime endTime += 300e6 aggregatedData[x, 0] = strTime x += 1 fileutils.writeCSV(outputDir + '/' + resource + '_' + fileCsv, aggregatedData, header=('Time', resource.capitalize()))
def readAndAggregate(filename, outputDir, resource='cpu'): colomn = 1 # start time if resource == 'cpu': colomn = 5; elif resource == 'memory': colomn = 7 elif resource == 'diskIO': colomn = 11 resourcePerTask = np.genfromtxt(filename, delimiter=',', skiprows=0, usecols=(0,colomn), filling_values = '0') fileCsv = filename.split('/')[-1] strTime = 600e6 endTime = strTime + 300e6 globalEndTime = 2506200000000 numberOfRows = globalEndTime/300e6 aggregatedData = np.zeros([numberOfRows, 2], dtype=float) x = 0 aggregatedData[x,0] = strTime for row in resourcePerTask[:]: time = np.float_(row[0]) if (time>=strTime and time<endTime): row_f = np.float_(row[1]) aggregatedData[x,1] += row_f else: strTime = endTime endTime += 300e6 x +=1 aggregatedData[x,0] = strTime while x < numberOfRows: strTime = endTime endTime += 300e6 aggregatedData[x,0] = strTime x +=1 fileutils.writeCSV(outputDir+'/' + resource + '_' +fileCsv, aggregatedData, header=('Time',resource.capitalize()))
def performsSlidingWindowForecast(filename_eta_lmda, minpercentile=5, step=30, input_window=3000, predic_window=30): ''' Input window = 250 hours = 250*12 = 3000 look ahead window 60 samples = 5 hours = 720min/5 = 60 ''' filename, curEta, curLmda = filename_eta_lmda data = np.genfromtxt(filename, delimiter=',', skip_header=1, usecols=(1)) minimum = np.percentile(data, minpercentile) N = len(data) print filename, "started..." curMachine = filename.split('/')[-1][:-4] # curMachine = "cpu" + filename.split('/')[-1].strip('.csv') model = Rnn_model.Rnn_model(data=data, machineID=curMachine, eta=curEta, lmda=curLmda) model.fit() pred = [] lastFc = None for p in range(input_window, len(data) - predic_window, predic_window): fc = model.predict(predic_window) if lastFc is not None: fc[0] = lastFc lastFc = fc[-1] fc[fc < 0] = minimum pred.append(fc) model.update() pred = np.array(pred).ravel() f = filename.split('/')[-1] fileutils.writeCSV("d:/data/cpu_rnn_forecasts/" + f, np.atleast_2d(pred)) print filename, "complete!"
# truevals = np.divide(truevals, np.max(truevals)) threshold = np.percentile(truevals, overload_percentile) cur_results.append(eval.calc_RMSE(truevals[train_window:], forecasts)) for val in eval.calc_upper_lower_acc(truevals[train_window:], forecasts): cur_results.append(val) for val in eval.calc_persample_accuracy(truevals[train_window:], forecasts, threshold): cur_results.append(val) for val in eval.calc_overload_states_acc(truevals[train_window:], forecasts, threshold): cur_results.append(val) return cur_results if __name__ == '__main__': files = [] root = "d:/data/"+TYPE+"/" for _, _, fs in os.walk(root): for f in fs: if f.endswith(".csv"): files.append(f) pool = ThreadPool(4) # performEvaluations(files[0]) results = pool.map(performEvaluations, files) pool.close() pool.join() fileutils.writeCSV("d:/data/results/"+TYPE+"_"+METHOD+".csv", results) print METHOD+" "+ TYPE + " complete"
def performEvaluations(filename, train_window = 3000, overload_dur = 5, overload_percentile = 70, steps=30): cur_results = [] forecasts = np.genfromtxt("d:/data/cpu_norm_forecasts/"+ filename,delimiter=',',usecols=range(0,steps)).ravel() truevals = np.genfromtxt("d:/data/cpuRate/"+filename, delimiter=',',skip_header=1)[train_window:train_window+len(forecasts),1] threshold = np.percentile(truevals, overload_percentile) cur_results.append(eval.calc_RMSE(truevals, forecasts)) for val in eval.calc_upper_lower_acc(truevals, forecasts): cur_results.append(val) for val in eval.calc_persample_accuracy(truevals, forecasts, threshold): cur_results.append(val) for val in eval.calc_overload_states_acc(truevals, forecasts, threshold): cur_results.append(val) return cur_results if __name__ == '__main__': files = [] for _, _, fs in os.walk("d:/data/cpuRate/"): for f in fs: if f.endswith(".csv"): files.append(f) pool = ThreadPool(4) results = pool.map(performEvaluations, files) pool.close() pool.join() fileutils.writeCSV("d:/data/results/normaldist.csv", results)
def performsSlidingWindowForecast(params, minpercentile=5, training_window=30, input_window=3000, predic_window=30): ''' Input window = 250 hours = 250*12 = 3000 look ahead window 60 samples = 5 hours = 720min/5 = 60 ''' filename, METHOD, TYPE, OUTPUT = params[0:4] #Wikidata # data = np.genfromtxt(filename) # data = data/np.max(data) lastFc = None if TYPE == 'pageviews' or TYPE == 'network': data = np.nan_to_num(np.genfromtxt(filename.replace(".csv",""))).ravel() data = data/np.max(data) else: data = np.nan_to_num(np.genfromtxt(filename, delimiter=',', skip_header=1)[:,1]).ravel() minimum = np.percentile(data,minpercentile) N = len(data) result = [] print filename, "started..." for strIndex in range(0,N-input_window - predic_window, predic_window): if strIndex == 0: y = data[:input_window] if METHOD == 'ar': model = AR_model.AR_model(y, order=training_window) elif METHOD == 'ma': model = MA_model.MA_model(y,order=training_window) elif METHOD == 'hw': model = HW_model.HW_model(y, minimum, 'additive') elif METHOD == 'markov1': model = Markov_model.Markov_model(y, order=1) elif METHOD == 'markov2': model = Markov_model.Markov_model(y, order=2) elif METHOD == 'press': model = Press_model.Press_model(y) elif METHOD == 'agile': model = Wavelet_model.Wavelet_model(y) elif METHOD == 'fnn': filename, METHOD, TYPE, OUTPUT, INPUT, curEta, curLmda = params[:7] curMachine = filename.split('/')[-1].replace(".csv",".xml") if TYPE.startswith("memory"): curMachine = curMachine.replace("memory", "cpu") model = Fnn_model.Fnn_model(data=data, machineID = curMachine, netPath="../data/"+TYPE.replace("memory", "cpu")+"_networks/"+curMachine, eta=curEta, lmda=curLmda) else: model = Fnn_model.Fnn_model(data=data, machineID = curMachine, netPath="../data/"+TYPE+"_networks/"+curMachine, eta=curEta, lmda=curLmda) elif METHOD == 'rnn': filename, METHOD, TYPE, OUTPUT, INPUT, curEta, curLmda = params[:7] curMachine = filename.split('/')[-1] if TYPE.startswith("memory"): curMachine = curMachine.replace("memory", "cpu") model = Rnn_model.Rnn_model(data=data, machineID = curMachine, netPath="../data/"+TYPE.replace("memory", "cpu")+"_networks/"+curMachine.replace(".csv",".xml"), eta=curEta, lmda=curLmda) else: model = Rnn_model.Rnn_model(data=data, machineID = curMachine, netPath="../data/"+TYPE+"_rnn_networks/"+curMachine.replace(".csv",".xml"), eta=curEta, lmda=curLmda) elif METHOD == 'entwine': filename, METHOD, TYPE, OUTPUT, INPUT, curEta, curLmda = params[:7] curMachine = filename.split('/')[-1] data2 = np.nan_to_num(np.genfromtxt(filename.replace("cpu", "memory"), delimiter=',', skip_header=1)[:,1]).ravel() model = Entwine_model.Entwine_model([data, data2], machineID = curMachine, netPath="../data/entwine_networks/"+curMachine.replace(".csv",".xml"), eta=curEta, lmda=curLmda) model.fit() else: if METHOD == 'press': y = data[input_window + strIndex - predic_window:input_window + strIndex] else: y = data[strIndex:strIndex+input_window] model.update(y) p = model.predict(predic_window) y_pred = np.atleast_2d(p) y_pred = np.reshape(y_pred, (predic_window,1)) if METHOD == 'rnn': if lastFc is not None: y_pred[0,0] = lastFc lastFc = y_pred[-1,0] y_pred[y_pred[:,0]<0,0] = minimum result.append(y_pred[:,0]) f = filename.split('/')[-1] fileutils.writeCSV(OUTPUT+TYPE+"_"+METHOD+"/"+f, np.atleast_2d(result)) print filename, "complete!"
def main(): global METHOD global TYPE global OUTPUT global INPUT # Launch main menu # main_menu() print "Comparing Forecasting methods:\n" print "Please complete the following:" print "Enter the type of data used in the evaluation:" TYPE = raw_input(" >> ") print "The base INPUT directory is:", INPUT+TYPE print "and OUTPUT directory is:", OUTPUT+TYPE print "Would you like to change it? y or n" ch = raw_input(" >> ").lower() if ch == 'y': print "Enter the base path for the INPUT directory (without the type):" INPUT = raw_input(" >> ") if not os.path.isdir(INPUT): print "Error: Please try again; INPUT directory:" INPUT = raw_input(" >> ") print "And enter the base path for evaluation OUTPUT directory (without the type):" OUTPUT = raw_input(" >> ") if not os.path.isdir(OUTPUT): print "Error: Please try again; evaluation OUTPUT directory:" OUTPUT = raw_input(" >> ") ######### print "Please choose a method to evaluate:" print "1. Holt-Winters" print "2. Auto-regression" print "3. 1st Markov chain" print "4. 2nd Markov chain" print "5. PRESS" print "6. Agile" print "7. FFNN Model" print "8. RNN Model" print "9. Entwine Model" print "10. Moving Average" print "11. Average combo4 Model" print "12. FFNN combo4 Model" print "13. Weighted Average model" print "0. Quit" choice = raw_input(" >> ") ch = choice.lower(); if ch == '': menu_actions['main_menu']() elif ch == '0': exit() else: METHOD = methods_dict[ch] pool = ThreadPool(4) files = fileutils.getFilelist(INPUT+TYPE) params = [] if METHOD =='fnn': if TYPE.startswith("memory"): hyperpath = "../data/"+TYPE.replace("memory", "cpu")+"_networks/hyperparams.csv" else: hyperpath = "../data/"+TYPE+"_networks/hyperparams.csv" hyperparms = np.genfromtxt(hyperpath, delimiter=',', dtype=None) for curRow in hyperparms: if TYPE.startswith("memory"): params.append([INPUT+TYPE+'/'+curRow[0].replace("cpu", "memory").strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]]) else: params.append([INPUT+TYPE+'/'+curRow[0].strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]]) elif METHOD =='rnn': if TYPE.startswith("memory"): hyperpath = "../data/"+TYPE.replace("memory", "cpu")+"_rnn_networks/hyperparams.csv" else: hyperpath = "../data/"+TYPE+"_rnn_networks/hyperparams.csv" hyperparms = np.genfromtxt(hyperpath, delimiter=',', dtype=None) for curRow in hyperparms: if TYPE.startswith("memory"): params.append([INPUT+TYPE+'/'+curRow[0].replace("cpu", "memory").strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]]) else: params.append([INPUT+TYPE+'/'+curRow[0].strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]]) elif METHOD == 'entwine': hyperpath = "../data/entwine_networks/hyperparams.csv" hyperparms = np.genfromtxt(hyperpath, delimiter=',', dtype=None) for curRow in hyperparms: params.append([INPUT+TYPE+'/'+curRow[0].strip("'")+".csv", METHOD, TYPE, OUTPUT, INPUT, curRow[3], curRow[4]]) else: for f in files: params.append([f, METHOD, TYPE, OUTPUT, INPUT]) if METHOD == 'avg4' or METHOD == 'combo4' or METHOD == 'wa': # ensembleModel(params[0]) pool.map(ensembleModel,params) pool.close() pool.join() else: # print "skip" # performsSlidingWindowForecast(params[0]) pool.map(performsSlidingWindowForecast, params) pool.close() pool.join() pool = ThreadPool(4) results = pool.map(performEvaluations, params) pool.close() pool.join() fileutils.writeCSV(OUTPUT+"results/"+TYPE+"_"+METHOD+".csv", results) print METHOD+" "+ TYPE + " complete" exit()
def ensembleModel(params, types=['ma','ar','fnn','agile'], step=30, input_window=3000): input_size = len(types) filename, METHOD, TYPE, OUTPUT = params[0:4] filename = filename.split('/')[-1] filename, METHOD, TYPE, OUTPUT = params[0:4] filename = filename.split('/')[-1] combine_model = np.genfromtxt(OUTPUT+TYPE+"_"+types[0]+"/"+filename, delimiter=',', usecols=range(0,30)).ravel() truevals = np.nan_to_num(np.genfromtxt(OUTPUT+TYPE+"/"+filename, delimiter=',',skip_header=1, usecols=(1))[:input_window+len(combine_model)]) for t in types[1:]: forecasts = np.genfromtxt(OUTPUT+TYPE+"_"+t+"/"+filename, delimiter=',', usecols=range(0,30)).ravel() combine_model = np.vstack((combine_model, forecasts)) average_fc = np.average(combine_model, axis=0) if METHOD == 'avg4': fileutils.writeCSV(OUTPUT+TYPE+"_"+METHOD+"/"+filename, np.atleast_2d(average_fc).reshape([178,30])) print filename, "complete" return if METHOD == 'combo4' or METHOD =='wa': training = SupervisedDataSet(input_size, 1) for i in range(step): training.appendLinked([combine_model[t][i] for t in range(input_size)], truevals[i+input_window]) besterr = eval.calc_RMSE(truevals[input_window:input_window+step], average_fc[:step]) bestNet = None for i in range(50): if METHOD == 'wa': net = buildNetwork(input_size, 1, hiddenclass=LinearLayer, bias=False) else: net = buildNetwork(input_size, 2, 1, hiddenclass=LinearLayer, bias=False) trainer = BackpropTrainer(net, training, learningrate=0.001, shuffle=False) trainer.trainEpochs(100) err = eval.calc_RMSE(truevals[input_window:input_window+step], net.activateOnDataset(training)) if err < besterr: bestNet = net break combo_fc = average_fc[0:step].tolist() # combo_fc = [] if bestNet == None: combo_fc = average_fc else: for i in range(step, len(combine_model[0]), step): training = SupervisedDataSet(input_size, 1) for j in range(i,i+step): combo_fc.append(bestNet.activate([combine_model[t][j] for t in range(input_size)])[0]) training.appendLinked([combine_model[t][j] for t in range(input_size)], truevals[j+input_window]) trainer = BackpropTrainer(bestNet, training, learningrate=0.01, shuffle=False) trainer.trainEpochs(2) result = np.atleast_2d(combo_fc).reshape([178,30]) minimum = np.percentile(truevals,5) result[0,result[0,:] < minimum] = minimum fileutils.writeCSV(OUTPUT+TYPE+"_"+METHOD+"/"+filename, result) print filename, "complete"
def performEvaluations(filename, train_window = 3000, overload_dur = 5, overload_percentile = 70, steps=30): cur_results = [] forecasts = np.genfromtxt("d:/data/diskio_ar_forecasts/"+ filename,delimiter=',',usecols=range(0,steps)).ravel() truevals = np.genfromtxt("d:/data/diskio/"+filename, delimiter=',',skip_header=1)[train_window:train_window+len(forecasts),1] threshold = np.percentile(truevals, overload_percentile) cur_results.append(eval.calc_RMSE(truevals, forecasts)) for val in eval.calc_upper_lower_acc(truevals, forecasts): cur_results.append(val) for val in eval.calc_persample_accuracy(truevals, forecasts, threshold): cur_results.append(val) for val in eval.calc_overload_states_acc(truevals, forecasts, threshold): cur_results.append(val) return cur_results if __name__ == '__main__': files = [] for _, _, fs in os.walk("d:/data/diskio/"): for f in fs: if f.endswith(".csv"): files.append(f) pool = ThreadPool(4) results = pool.map(performEvaluations, files) pool.close() pool.join() fileutils.writeCSV("d:/data/results/diskio_autoregressive.csv", results)
usecols=range(0, 30))).ravel() # # truevals = np.genfromtxt("d:/Wikipage data/"+TYPE+"/"+filename, delimiter=',',skip_header=1)[:train_window+len(forecasts),1] truevals = np.genfromtxt("d:/Wikipage data/" + TYPE + "/" + filename)[:train_window + len(forecasts)] truevals = truevals / np.max(truevals) cur_results.append(eval.calc_RMSE(truevals[train_window:], forecasts)) for val in eval.calc_upper_lower_acc(truevals[train_window:], forecasts): cur_results.append(val) return cur_results if __name__ == '__main__': files = [] pool = ThreadPool(4) root = "d:/Wikipage data/" + TYPE + "/" for _, _, fs in os.walk(root): for f in fs: files.append(f) # performEvaluations(files[0]) results = pool.map(performEvaluations, files) pool.close() pool.join() fileutils.writeCSV( "d:/Wikipage data/results/" + TYPE + "_" + METHOD + ".csv", results) print METHOD + " " + TYPE + " complete"
def performEvaluations(filename, train_window = 3000, overload_dur = 5, overload_percentile = 70, steps=30): cur_results = [] forecasts = np.genfromtxt("d:/data/diskio_hw_forecasts/"+ filename,delimiter=',',usecols=range(0,steps)).ravel() truevals = np.genfromtxt("d:/data/diskio/"+filename, delimiter=',',skip_header=1)[train_window:train_window+len(forecasts),1] threshold = np.percentile(truevals, overload_percentile) cur_results.append(eval.calc_RMSE(truevals, forecasts)) for val in eval.calc_upper_lower_acc(truevals, forecasts): cur_results.append(val) for val in eval.calc_persample_accuracy(truevals, forecasts, threshold): cur_results.append(val) for val in eval.calc_overload_states_acc(truevals, forecasts, threshold): cur_results.append(val) return cur_results if __name__ == '__main__': files = [] for _, _, fs in os.walk("d:/data/diskio/"): for f in fs: if f.endswith(".csv"): files.append(f) pool = ThreadPool(4) results = pool.map(performEvaluations, files) pool.close() pool.join() fileutils.writeCSV("d:/data/results/diskio_holtwinters.csv", results)
delimiter=',', skip_header=1)[train_window:train_window + len(forecasts), 1] threshold = np.percentile(truevals, overload_percentile) cur_results.append(eval.calc_RMSE(truevals, forecasts)) for val in eval.calc_upper_lower_acc(truevals, forecasts): cur_results.append(val) for val in eval.calc_persample_accuracy(truevals, forecasts, threshold): cur_results.append(val) for val in eval.calc_overload_states_acc(truevals, forecasts, threshold): cur_results.append(val) return cur_results if __name__ == '__main__': files = [] for _, _, fs in os.walk("d:/data/cpuRate/"): for f in fs: if f.endswith(".csv"): files.append(f) pool = ThreadPool(4) results = pool.map(performEvaluations, files) pool.close() pool.join() fileutils.writeCSV("d:/data/results/normaldist.csv", results)