Python estimateMissing示例，statistics.estimateMissing Python示例

示例#1

0

显示文件

def suppVectorRegress():

    kernelList = ["linear","rbf",polyKernel]
    names = ["linear","radial basis","poly"]
    preds = []

    # Retrieve time series data & apply preprocessing
    data = constructData()

    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    #print (data[0][1430])
    cutoff = len(data[0])-89  #predict march
    #print cutoff
    xTrain = data[0][0:cutoff]
    yTrain = data[1][0:cutoff]
    xTest = data[0][cutoff:]
    yTest = data[1][cutoff:]
    #print xTrain
    #print xTest
    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain,0.0)
    statistics.estimateMissing(xTest,0.0)

    # Logarithmically scale the data
    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(data[1]))
    #print indices
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    #print testIndices
    detrended,slope,intercept = statistics.detrend(trainIndices,yTrain)
    yTrain = detrended
    for gen in range(len(kernelList)):

        # Use SVR to predict test observations based upon training observations
        pred = svrPredictions(xTrain,yTrain,xTest,kernelList[gen])
        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest,trendedPred)

        print "The Normalized Root-Mean Square Error is " + str(err) + " using kernel " + names[gen] + "..."

        preds.append(trendedPred)

    names.append("actual")
    preds.append(yTest)

    visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Support Vector Regression Load Predictions vs. Actual", 
        yAxisName="Predicted Kilowatts")

示例#2

0

显示文件

文件： svr.py 项目： lbenning/Load-Forecasting

def suppVectorRegress():

    kernelList = ["linear","rbf",polyKernel]
    names = ["linear","radial basis","poly"]
    preds = []

    # Retrieve time series data & apply preprocessing
    data = constructData()

    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    cutoff = len(data)-364
    xTrain = data[0][0:cutoff]
    yTrain = data[1][0:cutoff]
    xTest = data[0][cutoff:]
    yTest = data[1][cutoff:]

    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain,0.0)
    statistics.estimateMissing(xTest,0.0)

    # Logarithmically scale the data
    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(data[1]))
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    detrended,slope,intercept = statistics.detrend(trainIndices,yTrain)
    yTrain = detrended

    for gen in range(len(kernelList)):

        # Use SVR to predict test observations based upon training observations
        pred = svrPredictions(xTrain,yTrain,xTest,kernelList[gen])
        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest,trendedPred)

        print "The Normalized Root-Mean Square Error is " + str(err) + " using kernel " + names[gen] + "..."

        preds.append(trendedPred)

    names.append("actual")
    preds.append(yTest)

    visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Support Vector Regression Load Predictions vs. Actual", 
        yAxisName="Predicted Kilowatts")

示例#3

0

显示文件

def gaussianProcesses():

    corrMods = [
        'cubic', 'squared_exponential', 'absolute_exponential', 'linear'
    ]
    preds = []

    # Retrieve time series data & apply preprocessing
    data = constructData()

    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    cutoff = len(data) - 364
    xTrain = data[0][0:cutoff]
    yTrain = data[1][0:cutoff]
    xTest = data[0][cutoff:]
    yTest = data[1][cutoff:]

    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain, 0.0)
    statistics.estimateMissing(xTest, 0.0)

    # Logarithmically scale the data
    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(data[1]))
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    detrended, slope, intercept = statistics.detrend(trainIndices, yTrain)
    yTrain = detrended

    for gen in range(len(corrMods)):

        # Use GPR to predict test observations based upon training observations
        pred = gaussProcPred(xTrain, yTrain, xTest, corrMods[gen])
        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices, pred, slope,
                                              intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest, trendedPred)

        print "The Normalized Root-Mean Square Error is " + str(
            err) + " using covariance function " + corrMods[gen] + "..."

        preds.append(trendedPred)

    corrMods.append("actual")
    data = constructData()
    cutoff = len(data) - 364
    yTest = data[1][cutoff:]
    preds.append(yTest)

    visualizer.comparisonPlot(
        2014,
        1,
        1,
        preds,
        corrMods,
        plotName="Gaussian Process Regression Load Predictions vs. Actual",
        yAxisName="Predicted Kilowatts")

示例#4

0

显示文件

文件： neural_without.py 项目： neilmathewm/Energy-Prediction-Main-Project

def neuralNetwork(file, test_perc):

    # Retrieve time series data & apply preprocessing
    #print tdata
    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    xData = []
    yData = []
    print("hello")
    # book = xlrd.open_workbook("data/data_only.xlsx")
    print file
    BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    book = xlrd.open_workbook("%s\media\uploadedfile\%s" % (BASE_DIR, file))

    sheet = book.sheet_by_index(0)
    for rx in range(1, sheet.nrows - 1):
        #row = sheet.row(rx)[3:]
        #row = [row[x].value for x in range(0,len(row)-4)]
        row = sheet.row(rx)[1:50]  #including temps
        rowy = sheet.row(rx + 1)[49]  #total of next day
        row = [row[x].value for x in range(0, len(row) - 1)]
        rowy = rowy.value
        xData.append(row)
        yData.append(rowy)
    #print "cutoff"+str(cutoff)
    #print (xData)
    #print (yData)
    cutoff = len(xData) - 89
    print(cutoff)
    xTrain = xData[0:cutoff]

    #print xTrain[47]
    #print xTrain
    yTrain = yData[0:cutoff]
    xTest = xData[cutoff:]
    #print cutoff
    #print xTest[0]
    yTest = yData[cutoff:]
    print(yTest)
    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain, 0.0)
    statistics.estimateMissing(xTest, 0.0)

    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(xData))
    print('ho')
    print(indices)
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    detrended, slope, intercept = statistics.detrend(trainIndices, yTrain)
    yTrain = detrended

    dimensions = [6, 10, 12]
    neurons = [30, 50, 50]

    names = []
    for x in range(len(dimensions)):
        s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x])
        names.append(s)

    preds = []

    for x in range(len(dimensions)):

        # Perform dimensionality reduction on the feature vectors
        pca = PCA(n_components=dimensions[x])
        pca.fit(xTrain)
        xTrainRed = pca.transform(xTrain)
        xTestRed = pca.transform(xTest)

        pred = fit_predict(xTrainRed, yTrain, xTestRed, 40, neurons[x])

        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices, pred, slope,
                                              intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest, trendedPred)

        # Append computed predictions to list for classifier predictions
        preds.append(trendedPred)

        print "The NRMSE for the neural network is " + str(err) + "..."

    preds.append(yTest)
    names.append("actual")
    return err, trendedPred
    visualizer.comparisonPlot(
        2014,
        1,
        1,
        preds,
        names,
        plotName="Neural Network Load Predictions vs. Actual",
        yAxisName="Predicted Kilowatts")

示例#5

0

显示文件

文件： clustering.py 项目： lbenning/Load-Forecasting

def clustering():

    # Retrieve time series data & apply preprocessing
    data = constructData()

    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    cutoff = len(data)-364
    xTrain = data[0][0:cutoff]
    yTrain = data[1][0:cutoff]
    xTest = data[0][cutoff:]
    yTest = data[1][cutoff:]

    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain,0.0)
    statistics.estimateMissing(xTest,0.0)

    # Logarithmically scale the data
    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(data[1]))
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    detrended,slope,intercept = statistics.detrend(trainIndices,yTrain)
    yTrain = detrended

    # Compute centroids and labels of data
    cward_7,lward_7 = hierarchicalClustering(xTrain,7)
    cward_365,lward_365 = hierarchicalClustering(xTrain,365)

    ckmeans_7,lkmeans_7 = kMeansClustering(xTrain,7)
    ckmeans_365,lkmeans_365 = kMeansClustering(xTrain,365)

    c = [cward_7,cward_365,ckmeans_7,ckmeans_365]
    l = [lward_7,lward_365,lkmeans_7,lkmeans_365]

    algNames = ["agglomerative(7)","agglomerative(365)","k-means(7)","k-means(365)"]

    preds = []

    for t in range(len(c)):
        # The centroids computed by the current clustering algorithm
        centroids = c[t]
        # The labels for the examples defined by the current clustering assignment
        labels = l[t]

        # Separate the training samples into cluster sets
        clusterSets = []
        # Time labels for the examples, separated into clusters
        timeLabels = []

        for x in range(len(centroids)):
            clusterSets.append([])
        for x in range(len(labels)):
            # Place the example into its cluster
            clusterSets[labels[x]].append((xTrain[x],yTrain[x]))
        # Compute predictions for each of the test examples
        pred = predictClustering(centroids,clusterSets,xTest,"euclidean")
        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest,trendedPred)
        # Add to list of predictions
        preds.append(trendedPred)

        print "The Normalized Root-Mean Square Error is " + str(err) + " using algorithm " + algNames[t] + "..."

    algNames.append("actual")
    preds.append(yTest)

    visualizer.comparisonPlot(2014,1,1,preds,algNames, 
        plotName="Clustering Load Predictions vs. Actual", 
        yAxisName="Predicted Kilowatts")

示例#6

0

显示文件

def neuralNetwork():

    # Retrieve time series data & apply preprocessing
    data = constructData()
    print len(data)
    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    #cutoff = len(data[0])-89
    cutoff = len(data[0]) - 89
    #print "cutoff"+str(cutoff)
    xTrain = data[0][0:cutoff]
    #print xTrain[47]
    print xTrain
    yTrain = data[1][0:cutoff]
    xTest = data[0][cutoff:]
    #print cutoff
    #print xTest[0]
    yTest = data[1][cutoff:]

    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain, 0.0)
    statistics.estimateMissing(xTest, 0.0)

    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(data[1]))
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    detrended, slope, intercept = statistics.detrend(trainIndices, yTrain)
    yTrain = detrended

    dimensions = [6, 10, 12]
    neurons = [30, 50, 50]

    names = []
    for x in range(len(dimensions)):
        s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x])
        names.append(s)

    preds = []

    for x in range(len(dimensions)):

        # Perform dimensionality reduction on the feature vectors
        pca = PCA(n_components=dimensions[x])
        pca.fit(xTrain)
        xTrainRed = pca.transform(xTrain)
        xTestRed = pca.transform(xTest)

        pred = fit_predict(xTrainRed, yTrain, xTestRed, 40, neurons[x])

        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices, pred, slope,
                                              intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest, trendedPred)

        # Append computed predictions to list for classifier predictions
        preds.append(trendedPred)

        print "The NRMSE for the neural network is " + str(err) + "..."

    preds.append(yTest)
    names.append("actual")

    visualizer.comparisonPlot(
        2014,
        1,
        1,
        preds,
        names,
        plotName="Neural Network Load Predictions vs. Actual",
        yAxisName="Predicted Kilowatts")

示例#7

0

显示文件

def neuralNetwork(file, days):

    xData = []
    yData = []
    BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    book = xlrd.open_workbook("%s\media\uploadedfile\%s" % (BASE_DIR, file))
    sheet = book.sheet_by_index(0)
    for rx in range(1, sheet.nrows):
        #row = sheet.row(rx)[3:]
        #row = [row[x].value for x in range(0,len(row)-4)]
        row = sheet.row(rx)[1:12]  #including temps
        rowy = sheet.row(rx)[12]  #total of next day
        row = [row[x].value for x in range(0, len(row))]
        rowy = rowy.value
        xData.append(row)
        yData.append(rowy)
    #print (xData)
    #print (yData)
    cu = len(xData) - 720
    cutoff = len(xData) - days
    #print(cutoff)
    xTrain = xData[cu:cutoff]

    yTrain = yData[cu:cutoff]
    xTest = xData[cutoff:]
    yTest = yData[cutoff:]
    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain, 0.0)
    statistics.estimateMissing(xTest, 0.0)

    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(xData))
    #print ('ho')
    #print (indices)
    trainIndices = indices[cu:cutoff]
    testIndices = indices[cutoff:]
    detrended, slope, intercept = statistics.detrend(trainIndices, yTrain)
    yTrain = detrended

    dimensions = [7, 8, 10, 11]
    neurons = [300, 500, 500, 500]

    names = []
    for x in range(len(dimensions)):
        s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x])
        names.append(s)
    preds = []
    trendedPred = []
    accu = []
    mse = []
    for x in range(len(dimensions)):

        # Perform dimensionality reduction on the feature vectors
        pca = PCA(n_components=dimensions[x])
        pca.fit(xTrain)
        xTrainRed = pca.transform(xTrain)
        xTestRed = pca.transform(xTest)

        pred = fit_predict(xTrainRed, yTrain, xTestRed, 100, neurons[x])

        # Add the trend back into the predictions
        temp1 = statistics.reapplyTrend(testIndices, pred, slope, intercept)
        # Reverse the normalization
        trendedPred.append([math.exp(z) for z in temp1])
        # Compute the NRMSE
        err = statistics.normRmse(yTest, trendedPred[x])
        err2 = statistics.mape(yTest, trendedPred[x])
        accu.append((1 - err2) * 100)
        mse.append(math.pow(err, 2))
        # Append computed predictions to list for classifier predictions
        preds.append(trendedPred[x])
        print("Error Rate :" + str(err2) + "\n\n")
    #  print "The NRMSE for the neural network is " + str(err) + "..."
    #  print "The %Accuracy for the neural network is " + str((1-err2)*100) + "...\n"
    max_val = max(accu)
    index_max = accu.index(max_val)
    return mse[index_max], accu[index_max], trendedPred[index_max], yTest
    '''

示例#8

0

显示文件

def clustering():

    # Retrieve time series data & apply preprocessing
    data = constructData()

    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    cutoff = len(data) - 364
    xTrain = data[0][0:cutoff]
    yTrain = data[1][0:cutoff]
    xTest = data[0][cutoff:]
    yTest = data[1][cutoff:]

    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain, 0.0)
    statistics.estimateMissing(xTest, 0.0)

    # Logarithmically scale the data
    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(data[1]))
    trainIndices = indices[0:cutoff]
    testIndices = indices[cutoff:]
    detrended, slope, intercept = statistics.detrend(trainIndices, yTrain)
    yTrain = detrended

    # Compute centroids and labels of data
    cward_7, lward_7 = hierarchicalClustering(xTrain, 7)
    cward_365, lward_365 = hierarchicalClustering(xTrain, 365)

    ckmeans_7, lkmeans_7 = kMeansClustering(xTrain, 7)
    ckmeans_365, lkmeans_365 = kMeansClustering(xTrain, 365)

    c = [cward_7, cward_365, ckmeans_7, ckmeans_365]
    l = [lward_7, lward_365, lkmeans_7, lkmeans_365]

    algNames = [
        "agglomerative(7)", "agglomerative(365)", "k-means(7)", "k-means(365)"
    ]

    preds = []

    for t in range(len(c)):
        # The centroids computed by the current clustering algorithm
        centroids = c[t]
        # The labels for the examples defined by the current clustering assignment
        labels = l[t]

        # Separate the training samples into cluster sets
        clusterSets = []
        # Time labels for the examples, separated into clusters
        timeLabels = []

        for x in range(len(centroids)):
            clusterSets.append([])
        for x in range(len(labels)):
            # Place the example into its cluster
            clusterSets[labels[x]].append((xTrain[x], yTrain[x]))
        # Compute predictions for each of the test examples
        pred = predictClustering(centroids, clusterSets, xTest, "euclidean")
        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices, pred, slope,
                                              intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest, trendedPred)
        # Add to list of predictions
        preds.append(trendedPred)

        print "The Normalized Root-Mean Square Error is " + str(
            err) + " using algorithm " + algNames[t] + "..."

    algNames.append("actual")
    preds.append(yTest)

    visualizer.comparisonPlot(
        2014,
        1,
        1,
        preds,
        algNames,
        plotName="Clustering Load Predictions vs. Actual",
        yAxisName="Predicted Kilowatts")

示例#9

0

显示文件

def neuralNetwork():

    # Retrieve time series data & apply preprocessing
    #print tdata
    # 2014 had 365 days, but we take the last 364 days since
    # the last day has no numerical value
    xData = []
    yData = []
    book = xlrd.open_workbook("data/data_with_9_variable.xlsx")
    sheet = book.sheet_by_index(0)
    for rx in range(1, sheet.nrows):
        #row = sheet.row(rx)[3:]
        #row = [row[x].value for x in range(0,len(row)-4)]
        row = sheet.row(rx)[1:12]  #including temps
        rowy = sheet.row(rx)[12]  #total of next day
        row = [row[x].value for x in range(0, len(row))]
        rowy = rowy.value
        xData.append(row)
        yData.append(rowy)
    #print "cutoff"+str(cutoff)
    print(xData)
    print(yData)
    cu = len(xData) - 720
    cutoff = len(xData) - 30
    print(cutoff)
    xTrain = xData[cu:cutoff]

    #print xTrain[47]
    #print xTrain
    yTrain = yData[cu:cutoff]
    xTest = xData[cutoff:]
    #print cutoff
    #print xTest[0]
    yTest = yData[cutoff:]
    print(yTest)

    # Fill in missing values denoted by zeroes as an average of
    # both neighbors
    statistics.estimateMissing(xTrain, 0.0)
    statistics.estimateMissing(xTest, 0.0)

    xTrain = [[math.log(y) for y in x] for x in xTrain]
    xTest = [[math.log(y) for y in x] for x in xTest]
    yTrain = [math.log(x) for x in yTrain]

    # Detrend the time series
    indices = np.arange(len(xData))
    print('ho')
    print(indices)
    trainIndices = indices[cu:cutoff]
    testIndices = indices[cutoff:]
    detrended, slope, intercept = statistics.detrend(trainIndices, yTrain)
    yTrain = detrended

    dimensions = [7, 8, 10, 11]
    neurons = [300, 500, 500, 500]

    names = []
    for x in range(len(dimensions)):
        s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x])
        names.append(s)
    preds = []

    for x in range(len(dimensions)):

        # Perform dimensionality reduction on the feature vectors
        pca = PCA(n_components=dimensions[x])
        pca.fit(xTrain)
        xTrainRed = pca.transform(xTrain)
        xTestRed = pca.transform(xTest)

        pred = fit_predict(xTrainRed, yTrain, xTestRed, 100, neurons[x])

        # Add the trend back into the predictions
        trendedPred = statistics.reapplyTrend(testIndices, pred, slope,
                                              intercept)
        # Reverse the normalization
        trendedPred = [math.exp(x) for x in trendedPred]
        # Compute the NRMSE
        err = statistics.normRmse(yTest, trendedPred)
        err2 = statistics.mape(yTest, trendedPred)
        # Append computed predictions to list for classifier predictions
        preds.append(trendedPred)

        print "The NRMSE for the neural network is " + str(err) + "..."
        print "The %Accuracy for the neural network is " + str(
            (1 - err2) * 100) + "...\n"

    preds.append(yTest)
    names.append("actual")

    visualizer.comparisonPlot(
        2014,
        1,
        1,
        preds,
        names,
        plotName="Neural Network Load Predictions vs. Actual",
        yAxisName="Predicted Kilowatts")

示例#10

0

显示文件

文件： neural.py 项目： lbenning/Load-Forecasting

def neuralNetwork():
  
  # Retrieve time series data & apply preprocessing
  data = constructData()

  # 2014 had 365 days, but we take the last 364 days since
  # the last day has no numerical value
  cutoff = len(data)-364
  xTrain = data[0][0:cutoff]
  yTrain = data[1][0:cutoff]
  xTest = data[0][cutoff:]
  yTest = data[1][cutoff:]

  # Fill in missing values denoted by zeroes as an average of
  # both neighbors
  statistics.estimateMissing(xTrain,0.0)
  statistics.estimateMissing(xTest,0.0)

  xTrain = [[math.log(y) for y in x] for x in xTrain]
  xTest = [[math.log(y) for y in x] for x in xTest]
  yTrain = [math.log(x) for x in yTrain]

  # Detrend the time series
  indices = np.arange(len(data[1]))
  trainIndices = indices[0:cutoff]
  testIndices = indices[cutoff:]
  detrended,slope,intercept = statistics.detrend(trainIndices,yTrain)
  yTrain = detrended

  dimensions = [6,10,12]
  neurons = [30,50,50]

  names = []
  for x in range(len(dimensions)):
    s = "d=" + str(dimensions[x]) + ",h=" + str(neurons[x])
    names.append(s)

  preds = []

  for x in range(len(dimensions)):

    # Perform dimensionality reduction on the feature vectors
    pca = PCA(n_components=dimensions[x])
    pca.fit(xTrain)
    xTrainRed = pca.transform(xTrain)
    xTestRed = pca.transform(xTest)

    pred = fit_predict(xTrainRed,yTrain,xTestRed,40,neurons[x])

    # Add the trend back into the predictions
    trendedPred = statistics.reapplyTrend(testIndices,pred,slope,intercept)
    # Reverse the normalization
    trendedPred = [math.exp(x) for x in trendedPred]
    # Compute the NRMSE
    err = statistics.normRmse(yTest,trendedPred)
    
    # Append computed predictions to list for classifier predictions
    preds.append(trendedPred)

    print "The NRMSE for the neural network is " + str(err) + "..."

  preds.append(yTest)
  names.append("actual")

  visualizer.comparisonPlot(2014,1,1,preds,names,plotName="Neural Network Load Predictions vs. Actual", 
        yAxisName="Predicted Kilowatts")