示例#1
0
def plotOrigVsDetrend():
    data = constructData()
    # Original time series
    data1 = constructData()
    origY = data1[1][0:len(data[1])-365]
    # Detrended time series
    indices = np.arange(len(data[1])-365)
    detrendY = statistics.detrend(indices,data[1][0:len(data[1])-365])[0]

    visualizer.comparisonPlot(2009,1,1,origY,detrendY,
        "Original","Detrended",plotName="Aggregate Electric Load : Original & Detrended", yAxisName="Kilowatts")
示例#2
0
def plotDetrended():
    data = constructData()
    # Plot of data after detrending with least squares regression
    indices = np.arange(len(data[1]))
    detrendY = statistics.detrend(indices,data[1])[0]
    visualizer.yearlyPlot(detrendY,
        2009,1,1,"Detrended Aggregate Electricity Demand","Residual Kilowatts")
示例#3
0
def plotOriginal():
    data = constructData()
    # Plot of aggregate electricity demand over the past 5 years
    section = data[1][0:len(data[1]) - 365]
    visualizer.yearlyPlot(section, 2009, 1, 1,
                          "Average Total Electricity Load : 2009-2013",
                          "Kilowatts")
示例#4
0
def plotDetrended():
    data = constructData()
    # Plot of data after detrending with least squares regression
    indices = np.arange(len(data[1]))
    detrendY = statistics.detrend(indices, data[1])[0]
    visualizer.yearlyPlot(detrendY, 2009, 1, 1,
                          "Detrended Aggregate Electricity Demand",
                          "Residual Kilowatts")
示例#5
0
def suppVectorRegress():

    kernelList = ["linear","rbf",polyKernel]
    names = ["linear","radial basis","poly"]
    preds = []

    # Retrieve time series data & apply preprocessing
    data = constructData()

    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    #print (data[0][1430])
    cutoff = len(data[0])-89  #predict march
    #print cutoff
    xTrain = data[0][0:cutoff]
    yTrain = data[1][0:cutoff]
    xTest = data[0][cutoff:]
    yTest = data[1][cutoff:]
    #print xTrain
    #print xTest
    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain,0.0)
    statistics.estimateMissing(xTest,0.0)

    # Logarithmically scale the data
    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(data[1]))
    #print indices
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    #print testIndices
    detrended,slope,intercept = statistics.detrend(trainIndices,yTrain)
    yTrain = detrended
    for gen in range(len(kernelList)):

        # Use SVR to predict test observations based upon training observations
        pred = svrPredictions(xTrain,yTrain,xTest,kernelList[gen])
        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest,trendedPred)

        print "The Normalized Root-Mean Square Error is " + str(err) + " using kernel " + names[gen] + "..."

        preds.append(trendedPred)

    names.append("actual")
    preds.append(yTest)

    visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Support Vector Regression Load Predictions vs. Actual", 
        yAxisName="Predicted Kilowatts")
示例#6
0
def plotOrigVsDetrend():
    data = constructData()
    # Original time series
    data1 = constructData()
    origY = data1[1][0:len(data[1]) - 365]
    # Detrended time series
    indices = np.arange(len(data[1]) - 365)
    detrendY = statistics.detrend(indices, data[1][0:len(data[1]) - 365])[0]

    visualizer.comparisonPlot(
        2009,
        1,
        1,
        origY,
        detrendY,
        "Original",
        "Detrended",
        plotName="Aggregate Electric Load : Original & Detrended",
        yAxisName="Kilowatts")
示例#7
0
def suppVectorRegress():

    kernelList = ["linear","rbf",polyKernel]
    names = ["linear","radial basis","poly"]
    preds = []

    # Retrieve time series data & apply preprocessing
    data = constructData()

    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    cutoff = len(data)-364
    xTrain = data[0][0:cutoff]
    yTrain = data[1][0:cutoff]
    xTest = data[0][cutoff:]
    yTest = data[1][cutoff:]

    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain,0.0)
    statistics.estimateMissing(xTest,0.0)

    # Logarithmically scale the data
    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(data[1]))
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    detrended,slope,intercept = statistics.detrend(trainIndices,yTrain)
    yTrain = detrended

    for gen in range(len(kernelList)):

        # Use SVR to predict test observations based upon training observations
        pred = svrPredictions(xTrain,yTrain,xTest,kernelList[gen])
        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest,trendedPred)

        print "The Normalized Root-Mean Square Error is " + str(err) + " using kernel " + names[gen] + "..."

        preds.append(trendedPred)

    names.append("actual")
    preds.append(yTest)

    visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Support Vector Regression Load Predictions vs. Actual", 
        yAxisName="Predicted Kilowatts")
示例#8
0
def neuralNetwork():

    # Retrieve time series data & apply preprocessing
    data = constructData()
    print len(data)
    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    #cutoff = len(data[0])-89
    cutoff = len(data[0]) - 89
    #print "cutoff"+str(cutoff)
    xTrain = data[0][0:cutoff]
    #print xTrain[47]
    print xTrain
    yTrain = data[1][0:cutoff]
    xTest = data[0][cutoff:]
    #print cutoff
    #print xTest[0]
    yTest = data[1][cutoff:]

    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain, 0.0)
    statistics.estimateMissing(xTest, 0.0)

    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(data[1]))
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    detrended, slope, intercept = statistics.detrend(trainIndices, yTrain)
    yTrain = detrended

    dimensions = [6, 10, 12]
    neurons = [30, 50, 50]

    names = []
    for x in range(len(dimensions)):
        s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x])
        names.append(s)

    preds = []

    for x in range(len(dimensions)):

        # Perform dimensionality reduction on the feature vectors
        pca = PCA(n_components=dimensions[x])
        pca.fit(xTrain)
        xTrainRed = pca.transform(xTrain)
        xTestRed = pca.transform(xTest)

        pred = fit_predict(xTrainRed, yTrain, xTestRed, 40, neurons[x])

        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices, pred, slope,
                                              intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest, trendedPred)

        # Append computed predictions to list for classifier predictions
        preds.append(trendedPred)

        print "The NRMSE for the neural network is " + str(err) + "..."

    preds.append(yTest)
    names.append("actual")

    visualizer.comparisonPlot(
        2014,
        1,
        1,
        preds,
        names,
        plotName="Neural Network Load Predictions vs. Actual",
        yAxisName="Predicted Kilowatts")
示例#9
0
def plotPeriodogram():
    data = constructData()
    visualizer.periodogramPlot(
        data[1][len(data[1]) - 730:len(data[1]) - 365],
        "Periodogram of Average Total Electricity Load : 2013")
示例#10
0
def plotLag():
    data = constructData()
    visualizer.lagPlot(data[1][0:len(data[1]) - 365],
                       "Average Total Electricity Load Lag : 2009-2013")
示例#11
0
def plotCorrelogram():
    data = constructData()
    visualizer.autoCorrPlot(
        data[1][len(data[1]) - 730:len(data[1]) - 365],
        "Average Total Electricity Load Autocorrelations : 2013")
示例#12
0
def plotOriginal():
    data = constructData()
    # Plot of aggregate electricity demand over the past 5 years
    section = data[1][0:len(data[1])-365]
    visualizer.yearlyPlot(section,
        2009,1,1,"Average Total Electricity Load : 2009-2013","Kilowatts")
示例#13
0
def plotCorrelogram():
    data = constructData()
    visualizer.autoCorrPlot(data[1][len(data[1])-730:len(data[1])-365],"Average Total Electricity Load Autocorrelations : 2013")
示例#14
0
def clustering():

    # Retrieve time series data & apply preprocessing
    data = constructData()

    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    cutoff = len(data)-364
    xTrain = data[0][0:cutoff]
    yTrain = data[1][0:cutoff]
    xTest = data[0][cutoff:]
    yTest = data[1][cutoff:]

    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain,0.0)
    statistics.estimateMissing(xTest,0.0)

    # Logarithmically scale the data
    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(data[1]))
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    detrended,slope,intercept = statistics.detrend(trainIndices,yTrain)
    yTrain = detrended

    # Compute centroids and labels of data
    cward_7,lward_7 = hierarchicalClustering(xTrain,7)
    cward_365,lward_365 = hierarchicalClustering(xTrain,365)

    ckmeans_7,lkmeans_7 = kMeansClustering(xTrain,7)
    ckmeans_365,lkmeans_365 = kMeansClustering(xTrain,365)

    c = [cward_7,cward_365,ckmeans_7,ckmeans_365]
    l = [lward_7,lward_365,lkmeans_7,lkmeans_365]

    algNames = ["agglomerative(7)","agglomerative(365)","k-means(7)","k-means(365)"]

    preds = []

    for t in range(len(c)):
        # The centroids computed by the current clustering algorithm
        centroids = c[t]
        # The labels for the examples defined by the current clustering assignment
        labels = l[t]

        # Separate the training samples into cluster sets
        clusterSets = []
        # Time labels for the examples, separated into clusters
        timeLabels = []

        for x in range(len(centroids)):
            clusterSets.append([])
        for x in range(len(labels)):
            # Place the example into its cluster
            clusterSets[labels[x]].append((xTrain[x],yTrain[x]))
        # Compute predictions for each of the test examples
        pred = predictClustering(centroids,clusterSets,xTest,"euclidean")
        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest,trendedPred)
        # Add to list of predictions
        preds.append(trendedPred)

        print "The Normalized Root-Mean Square Error is " + str(err) + " using algorithm " + algNames[t] + "..."

    algNames.append("actual")
    preds.append(yTest)

    visualizer.comparisonPlot(2014,1,1,preds,algNames, 
        plotName="Clustering Load Predictions vs. Actual", 
        yAxisName="Predicted Kilowatts")
示例#15
0
def clustering():

    # Retrieve time series data & apply preprocessing
    data = constructData()

    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    cutoff = len(data) - 364
    xTrain = data[0][0:cutoff]
    yTrain = data[1][0:cutoff]
    xTest = data[0][cutoff:]
    yTest = data[1][cutoff:]

    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain, 0.0)
    statistics.estimateMissing(xTest, 0.0)

    # Logarithmically scale the data
    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(data[1]))
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    detrended, slope, intercept = statistics.detrend(trainIndices, yTrain)
    yTrain = detrended

    # Compute centroids and labels of data
    cward_7, lward_7 = hierarchicalClustering(xTrain, 7)
    cward_365, lward_365 = hierarchicalClustering(xTrain, 365)

    ckmeans_7, lkmeans_7 = kMeansClustering(xTrain, 7)
    ckmeans_365, lkmeans_365 = kMeansClustering(xTrain, 365)

    c = [cward_7, cward_365, ckmeans_7, ckmeans_365]
    l = [lward_7, lward_365, lkmeans_7, lkmeans_365]

    algNames = [
        "agglomerative(7)", "agglomerative(365)", "k-means(7)", "k-means(365)"
    ]

    preds = []

    for t in range(len(c)):
        # The centroids computed by the current clustering algorithm
        centroids = c[t]
        # The labels for the examples defined by the current clustering assignment
        labels = l[t]

        # Separate the training samples into cluster sets
        clusterSets = []
        # Time labels for the examples, separated into clusters
        timeLabels = []

        for x in range(len(centroids)):
            clusterSets.append([])
        for x in range(len(labels)):
            # Place the example into its cluster
            clusterSets[labels[x]].append((xTrain[x], yTrain[x]))
        # Compute predictions for each of the test examples
        pred = predictClustering(centroids, clusterSets, xTest, "euclidean")
        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices, pred, slope,
                                              intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest, trendedPred)
        # Add to list of predictions
        preds.append(trendedPred)

        print "The Normalized Root-Mean Square Error is " + str(
            err) + " using algorithm " + algNames[t] + "..."

    algNames.append("actual")
    preds.append(yTest)

    visualizer.comparisonPlot(
        2014,
        1,
        1,
        preds,
        algNames,
        plotName="Clustering Load Predictions vs. Actual",
        yAxisName="Predicted Kilowatts")
示例#16
0
def plotPeriodogram():
    data = constructData()
    visualizer.periodogramPlot(data[1][len(data[1])-730:len(data[1])-365],
        "Periodogram of Average Total Electricity Load : 2013")
示例#17
0
def plotLag():
    data = constructData()
    visualizer.lagPlot(data[1][0:len(data[1])-365],"Average Total Electricity Load Lag : 2009-2013")
示例#18
0
def gaussianProcesses():

    corrMods = [
        'cubic', 'squared_exponential', 'absolute_exponential', 'linear'
    ]
    preds = []

    # Retrieve time series data & apply preprocessing
    data = constructData()

    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    cutoff = len(data) - 364
    xTrain = data[0][0:cutoff]
    yTrain = data[1][0:cutoff]
    xTest = data[0][cutoff:]
    yTest = data[1][cutoff:]

    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain, 0.0)
    statistics.estimateMissing(xTest, 0.0)

    # Logarithmically scale the data
    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(data[1]))
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    detrended, slope, intercept = statistics.detrend(trainIndices, yTrain)
    yTrain = detrended

    for gen in range(len(corrMods)):

        # Use GPR to predict test observations based upon training observations
        pred = gaussProcPred(xTrain, yTrain, xTest, corrMods[gen])
        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices, pred, slope,
                                              intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest, trendedPred)

        print "The Normalized Root-Mean Square Error is " + str(
            err) + " using covariance function " + corrMods[gen] + "..."

        preds.append(trendedPred)

    corrMods.append("actual")
    data = constructData()
    cutoff = len(data) - 364
    yTest = data[1][cutoff:]
    preds.append(yTest)

    visualizer.comparisonPlot(
        2014,
        1,
        1,
        preds,
        corrMods,
        plotName="Gaussian Process Regression Load Predictions vs. Actual",
        yAxisName="Predicted Kilowatts")
示例#19
0
def neuralNetwork():
  
  # Retrieve time series data & apply preprocessing
  data = constructData()

  # 2014 had 365 days, but we take the last 364 days since
  # the last day has no numerical value
  cutoff = len(data)-364
  xTrain = data[0][0:cutoff]
  yTrain = data[1][0:cutoff]
  xTest = data[0][cutoff:]
  yTest = data[1][cutoff:]

  # Fill in missing values denoted by zeroes as an average of
  # both neighbors
  statistics.estimateMissing(xTrain,0.0)
  statistics.estimateMissing(xTest,0.0)

  xTrain = [[math.log(y) for y in x] for x in xTrain]
  xTest = [[math.log(y) for y in x] for x in xTest]
  yTrain = [math.log(x) for x in yTrain]

  # Detrend the time series
  indices = np.arange(len(data[1]))
  trainIndices = indices[0:cutoff]
  testIndices = indices[cutoff:]
  detrended,slope,intercept = statistics.detrend(trainIndices,yTrain)
  yTrain = detrended

  dimensions = [6,10,12]
  neurons = [30,50,50]

  names = []
  for x in range(len(dimensions)):
    s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x])
    names.append(s)

  preds = []

  for x in range(len(dimensions)):

    # Perform dimensionality reduction on the feature vectors
    pca = PCA(n_components=dimensions[x])
    pca.fit(xTrain)
    xTrainRed = pca.transform(xTrain)
    xTestRed = pca.transform(xTest)

    pred = fit_predict(xTrainRed,yTrain,xTestRed,40,neurons[x])

    # Add the trend back into the predictions
    trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept)
    # Reverse the normalization
    trendedPred = [math.exp(x) for x in trendedPred]
    # Compute the NRMSE
    err = statistics.normRmse(yTest,trendedPred)
    
    # Append computed predictions to list for classifier predictions
    preds.append(trendedPred)

    print "The NRMSE for the neural network is " + str(err) + "..."

  preds.append(yTest)
  names.append("actual")

  visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Neural Network Load Predictions vs. Actual", 
        yAxisName="Predicted Kilowatts")