def suppVectorRegress(): kernelList = ["linear","rbf",polyKernel] names = ["linear","radial basis","poly"] preds = [] # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value #print (data[0][1430]) cutoff = len(data[0])-89 #predict march #print cutoff xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] #print xTrain #print xTest # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain,0.0) statistics.estimateMissing(xTest,0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) #print indices trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] #print testIndices detrended,slope,intercept = statistics.detrend(trainIndices,yTrain) yTrain = detrended for gen in range(len(kernelList)): # Use SVR to predict test observations based upon training observations pred = svrPredictions(xTrain,yTrain,xTest,kernelList[gen]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest,trendedPred) print "The Normalized Root-Mean Square Error is " + str(err) + " using kernel " + names[gen] + "..." preds.append(trendedPred) names.append("actual") preds.append(yTest) visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Support Vector Regression Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def suppVectorRegress(): kernelList = ["linear","rbf",polyKernel] names = ["linear","radial basis","poly"] preds = [] # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data)-364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain,0.0) statistics.estimateMissing(xTest,0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended,slope,intercept = statistics.detrend(trainIndices,yTrain) yTrain = detrended for gen in range(len(kernelList)): # Use SVR to predict test observations based upon training observations pred = svrPredictions(xTrain,yTrain,xTest,kernelList[gen]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest,trendedPred) print "The Normalized Root-Mean Square Error is " + str(err) + " using kernel " + names[gen] + "..." preds.append(trendedPred) names.append("actual") preds.append(yTest) visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Support Vector Regression Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def gaussianProcesses(): corrMods = [ 'cubic', 'squared_exponential', 'absolute_exponential', 'linear' ] preds = [] # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data) - 364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended for gen in range(len(corrMods)): # Use GPR to predict test observations based upon training observations pred = gaussProcPred(xTrain, yTrain, xTest, corrMods[gen]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) print "The Normalized Root-Mean Square Error is " + str( err) + " using covariance function " + corrMods[gen] + "..." preds.append(trendedPred) corrMods.append("actual") data = constructData() cutoff = len(data) - 364 yTest = data[1][cutoff:] preds.append(yTest) visualizer.comparisonPlot( 2014, 1, 1, preds, corrMods, plotName="Gaussian Process Regression Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def neuralNetwork(file, test_perc): # Retrieve time series data & apply preprocessing #print tdata # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value xData = [] yData = [] print("hello") # book = xlrd.open_workbook("data/data_only.xlsx") print file BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) book = xlrd.open_workbook("%s\media\uploadedfile\%s" % (BASE_DIR, file)) sheet = book.sheet_by_index(0) for rx in range(1, sheet.nrows - 1): #row = sheet.row(rx)[3:] #row = [row[x].value for x in range(0,len(row)-4)] row = sheet.row(rx)[1:50] #including temps rowy = sheet.row(rx + 1)[49] #total of next day row = [row[x].value for x in range(0, len(row) - 1)] rowy = rowy.value xData.append(row) yData.append(rowy) #print "cutoff"+str(cutoff) #print (xData) #print (yData) cutoff = len(xData) - 89 print(cutoff) xTrain = xData[0:cutoff] #print xTrain[47] #print xTrain yTrain = yData[0:cutoff] xTest = xData[cutoff:] #print cutoff #print xTest[0] yTest = yData[cutoff:] print(yTest) # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(xData)) print('ho') print(indices) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended dimensions = [6, 10, 12] neurons = [30, 50, 50] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed, yTrain, xTestRed, 40, neurons[x]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) # Append computed predictions to list for classifier predictions preds.append(trendedPred) print "The NRMSE for the neural network is " + str(err) + "..." preds.append(yTest) names.append("actual") return err, trendedPred visualizer.comparisonPlot( 2014, 1, 1, preds, names, plotName="Neural Network Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def clustering(): # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data)-364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain,0.0) statistics.estimateMissing(xTest,0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended,slope,intercept = statistics.detrend(trainIndices,yTrain) yTrain = detrended # Compute centroids and labels of data cward_7,lward_7 = hierarchicalClustering(xTrain,7) cward_365,lward_365 = hierarchicalClustering(xTrain,365) ckmeans_7,lkmeans_7 = kMeansClustering(xTrain,7) ckmeans_365,lkmeans_365 = kMeansClustering(xTrain,365) c = [cward_7,cward_365,ckmeans_7,ckmeans_365] l = [lward_7,lward_365,lkmeans_7,lkmeans_365] algNames = ["agglomerative(7)","agglomerative(365)","k-means(7)","k-means(365)"] preds = [] for t in range(len(c)): # The centroids computed by the current clustering algorithm centroids = c[t] # The labels for the examples defined by the current clustering assignment labels = l[t] # Separate the training samples into cluster sets clusterSets = [] # Time labels for the examples, separated into clusters timeLabels = [] for x in range(len(centroids)): clusterSets.append([]) for x in range(len(labels)): # Place the example into its cluster clusterSets[labels[x]].append((xTrain[x],yTrain[x])) # Compute predictions for each of the test examples pred = predictClustering(centroids,clusterSets,xTest,"euclidean") # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest,trendedPred) # Add to list of predictions preds.append(trendedPred) print "The Normalized Root-Mean Square Error is " + str(err) + " using algorithm " + algNames[t] + "..." algNames.append("actual") preds.append(yTest) visualizer.comparisonPlot(2014,1,1,preds,algNames, plotName="Clustering Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def neuralNetwork(): # Retrieve time series data & apply preprocessing data = constructData() print len(data) # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value #cutoff = len(data[0])-89 cutoff = len(data[0]) - 89 #print "cutoff"+str(cutoff) xTrain = data[0][0:cutoff] #print xTrain[47] print xTrain yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] #print cutoff #print xTest[0] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended dimensions = [6, 10, 12] neurons = [30, 50, 50] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed, yTrain, xTestRed, 40, neurons[x]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) # Append computed predictions to list for classifier predictions preds.append(trendedPred) print "The NRMSE for the neural network is " + str(err) + "..." preds.append(yTest) names.append("actual") visualizer.comparisonPlot( 2014, 1, 1, preds, names, plotName="Neural Network Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def neuralNetwork(file, days): xData = [] yData = [] BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) book = xlrd.open_workbook("%s\media\uploadedfile\%s" % (BASE_DIR, file)) sheet = book.sheet_by_index(0) for rx in range(1, sheet.nrows): #row = sheet.row(rx)[3:] #row = [row[x].value for x in range(0,len(row)-4)] row = sheet.row(rx)[1:12] #including temps rowy = sheet.row(rx)[12] #total of next day row = [row[x].value for x in range(0, len(row))] rowy = rowy.value xData.append(row) yData.append(rowy) #print (xData) #print (yData) cu = len(xData) - 720 cutoff = len(xData) - days #print(cutoff) xTrain = xData[cu:cutoff] yTrain = yData[cu:cutoff] xTest = xData[cutoff:] yTest = yData[cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(xData)) #print ('ho') #print (indices) trainIndices = indices[cu:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended dimensions = [7, 8, 10, 11] neurons = [300, 500, 500, 500] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] trendedPred = [] accu = [] mse = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed, yTrain, xTestRed, 100, neurons[x]) # Add the trend back into the predictions temp1 = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred.append([math.exp(z) for z in temp1]) # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred[x]) err2 = statistics.mape(yTest, trendedPred[x]) accu.append((1 - err2) * 100) mse.append(math.pow(err, 2)) # Append computed predictions to list for classifier predictions preds.append(trendedPred[x]) print("Error Rate :" + str(err2) + "\n\n") # print "The NRMSE for the neural network is " + str(err) + "..." # print "The %Accuracy for the neural network is " + str((1-err2)*100) + "...\n" max_val = max(accu) index_max = accu.index(max_val) return mse[index_max], accu[index_max], trendedPred[index_max], yTest '''
def clustering(): # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data) - 364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) # Logarithmically scale the data xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended # Compute centroids and labels of data cward_7, lward_7 = hierarchicalClustering(xTrain, 7) cward_365, lward_365 = hierarchicalClustering(xTrain, 365) ckmeans_7, lkmeans_7 = kMeansClustering(xTrain, 7) ckmeans_365, lkmeans_365 = kMeansClustering(xTrain, 365) c = [cward_7, cward_365, ckmeans_7, ckmeans_365] l = [lward_7, lward_365, lkmeans_7, lkmeans_365] algNames = [ "agglomerative(7)", "agglomerative(365)", "k-means(7)", "k-means(365)" ] preds = [] for t in range(len(c)): # The centroids computed by the current clustering algorithm centroids = c[t] # The labels for the examples defined by the current clustering assignment labels = l[t] # Separate the training samples into cluster sets clusterSets = [] # Time labels for the examples, separated into clusters timeLabels = [] for x in range(len(centroids)): clusterSets.append([]) for x in range(len(labels)): # Place the example into its cluster clusterSets[labels[x]].append((xTrain[x], yTrain[x])) # Compute predictions for each of the test examples pred = predictClustering(centroids, clusterSets, xTest, "euclidean") # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) # Add to list of predictions preds.append(trendedPred) print "The Normalized Root-Mean Square Error is " + str( err) + " using algorithm " + algNames[t] + "..." algNames.append("actual") preds.append(yTest) visualizer.comparisonPlot( 2014, 1, 1, preds, algNames, plotName="Clustering Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def neuralNetwork(): # Retrieve time series data & apply preprocessing #print tdata # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value xData = [] yData = [] book = xlrd.open_workbook("data/data_with_9_variable.xlsx") sheet = book.sheet_by_index(0) for rx in range(1, sheet.nrows): #row = sheet.row(rx)[3:] #row = [row[x].value for x in range(0,len(row)-4)] row = sheet.row(rx)[1:12] #including temps rowy = sheet.row(rx)[12] #total of next day row = [row[x].value for x in range(0, len(row))] rowy = rowy.value xData.append(row) yData.append(rowy) #print "cutoff"+str(cutoff) print(xData) print(yData) cu = len(xData) - 720 cutoff = len(xData) - 30 print(cutoff) xTrain = xData[cu:cutoff] #print xTrain[47] #print xTrain yTrain = yData[cu:cutoff] xTest = xData[cutoff:] #print cutoff #print xTest[0] yTest = yData[cutoff:] print(yTest) # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain, 0.0) statistics.estimateMissing(xTest, 0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(xData)) print('ho') print(indices) trainIndices = indices[cu:cutoff] testIndices = indices[cutoff:] detrended, slope, intercept = statistics.detrend(trainIndices, yTrain) yTrain = detrended dimensions = [7, 8, 10, 11] neurons = [300, 500, 500, 500] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed, yTrain, xTestRed, 100, neurons[x]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices, pred, slope, intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest, trendedPred) err2 = statistics.mape(yTest, trendedPred) # Append computed predictions to list for classifier predictions preds.append(trendedPred) print "The NRMSE for the neural network is " + str(err) + "..." print "The %Accuracy for the neural network is " + str( (1 - err2) * 100) + "...\n" preds.append(yTest) names.append("actual") visualizer.comparisonPlot( 2014, 1, 1, preds, names, plotName="Neural Network Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")
def neuralNetwork(): # Retrieve time series data & apply preprocessing data = constructData() # 2014 had 365 days, but we take the last 364 days since # the last day has no numerical value cutoff = len(data)-364 xTrain = data[0][0:cutoff] yTrain = data[1][0:cutoff] xTest = data[0][cutoff:] yTest = data[1][cutoff:] # Fill in missing values denoted by zeroes as an average of # both neighbors statistics.estimateMissing(xTrain,0.0) statistics.estimateMissing(xTest,0.0) xTrain = [[math.log(y) for y in x] for x in xTrain] xTest = [[math.log(y) for y in x] for x in xTest] yTrain = [math.log(x) for x in yTrain] # Detrend the time series indices = np.arange(len(data[1])) trainIndices = indices[0:cutoff] testIndices = indices[cutoff:] detrended,slope,intercept = statistics.detrend(trainIndices,yTrain) yTrain = detrended dimensions = [6,10,12] neurons = [30,50,50] names = [] for x in range(len(dimensions)): s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x]) names.append(s) preds = [] for x in range(len(dimensions)): # Perform dimensionality reduction on the feature vectors pca = PCA(n_components=dimensions[x]) pca.fit(xTrain) xTrainRed = pca.transform(xTrain) xTestRed = pca.transform(xTest) pred = fit_predict(xTrainRed,yTrain,xTestRed,40,neurons[x]) # Add the trend back into the predictions trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept) # Reverse the normalization trendedPred = [math.exp(x) for x in trendedPred] # Compute the NRMSE err = statistics.normRmse(yTest,trendedPred) # Append computed predictions to list for classifier predictions preds.append(trendedPred) print "The NRMSE for the neural network is " + str(err) + "..." preds.append(yTest) names.append("actual") visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Neural Network Load Predictions vs. Actual", yAxisName="Predicted Kilowatts")