def test_mreg_interpolation_multi(self): park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004) target = windpark.get_target() timestep = 600 measurements = target.get_measurements()[300:350] damaged, indices = MARDestroyer().destroy(measurements, percentage=.50) before_misses = MissingDataFinder().find(damaged, timestep) neighbors = windpark.get_turbines()[:-1] count_neighbors = len(neighbors) reg = 'knn' # KNeighborsRegressor(10, 'uniform') regargs = {'n' : 10, 'variant' : 'uniform'} processed = 0 missed = {k : count_neighbors for k in indices} exclude = [] damaged_nseries = [] for neighbor in neighbors: nseries = neighbor.get_measurements()[300:350] damaged, indices = MARDestroyer().destroy(nseries, percentage=.50, exclude=exclude) for index in indices: if(index not in missed.keys()): missed[index] = count_neighbors missed[index] -= 1 if(missed[index] == 1): exclude.append(index) # exclude in next iterations damaged_nseries.append(damaged) t_hat = MRegInterpolation().interpolate(damaged, timestep=timestep,\ neighbor_series=damaged_nseries, reg=reg, regargs=regargs) after_misses = MissingDataFinder().find(t_hat, timestep) assert(len(after_misses) < 1)
def test_mreg_interpolation_multi(self): park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004) target = windpark.get_target() timestep = 600 measurements = target.get_measurements()[300:350] damaged, indices = MARDestroyer().destroy(measurements, percentage=.50) before_misses = MissingDataFinder().find(damaged, timestep) neighbors = windpark.get_turbines()[:-1] count_neighbors = len(neighbors) reg = 'knn' # KNeighborsRegressor(10, 'uniform') regargs = {'n' : 8, 'variant' : 'uniform'} processed = 0 missed = {k : count_neighbors for k in indices} exclude = [] damaged_nseries = [] for neighbor in neighbors: nseries = neighbor.get_measurements()[300:350] damaged, indices = MARDestroyer().destroy(nseries, percentage=.50, exclude=exclude) for index in indices: if(index not in missed.keys()): missed[index] = count_neighbors missed[index] -= 1 if(missed[index] == 1): exclude.append(index) # exclude in next iterations damaged_nseries.append(damaged) t_hat = MRegInterpolation().interpolate(damaged, timestep=timestep,\ neighbor_series=damaged_nseries, reg=reg, regargs=regargs) after_misses = MissingDataFinder().find(t_hat, timestep) assert(len(after_misses) < 1)
def compute_mse(regressor, horizon): # get wind park and corresponding target. windpark = NREL().get_windpark(NREL.park_id['tehachapi'], 3, 2004, 2005) target = windpark.get_target() # use power mapping for pattern-label mapping. feature_window = 3 mapping = PowerMapping() X = mapping.get_features_park(windpark, feature_window, horizon) y = mapping.get_labels_turbine(target, feature_window, horizon) # train roughly for the year 2004, test for 2005. train_to = int(math.floor(len(X) * 0.5)) test_to = len(X) train_step, test_step = 25, 25 X_train=X[:train_to:train_step] y_train=y[:train_to:train_step] X_test=X[train_to:test_to:test_step] y_test=y[train_to:test_to:test_step] if(regressor == 'svr'): reg = SVR(kernel='rbf', epsilon=0.1, C = 100.0,\ gamma = 0.0001).fit(X_train,y_train) mse = mean_squared_error(reg.predict(X_test),y_test) elif(regressor == 'knn'): reg = KNeighborsRegressor(10, 'uniform').fit(X_train,y_train) mse = mean_squared_error(reg.predict(X_test),y_test) return mse
def compute_mse(regressor, param): # get wind park and corresponding target. forecast is for the target # turbine park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004) target = windpark.get_target() # use power mapping for pattern-label mapping. Feature window length # is 3 time steps and time horizon (forecast) is 3 time steps. feature_window = 6 horizon = 3 mapping = PowerMapping() X = mapping.get_features_park(windpark, feature_window, horizon) Y = mapping.get_labels_turbine(target, feature_window, horizon) # train roughly for the year 2004. train_to = int(math.floor(len(X) * 0.5)) # test roughly for the year 2005. test_to = len(X) # train and test only every fifth pattern, for performance. train_step, test_step = 5, 5 if(regressor == 'rf'): # random forest regressor reg = RandomForestRegressor(n_estimators=param, criterion='mse') reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) y_hat = reg.predict(X[train_to:test_to:test_step]) elif(regressor == 'knn'): # TODO the regressor does not need to be newly trained in # the case of KNN reg = KNeighborsRegressor(param, 'uniform') # fitting the pattern-label pairs reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) y_hat = reg.predict(X[train_to:test_to:test_step]) else: raise Exception("No regressor set.") # naive is also known as persistence model. naive_hat = zeros(len(y_hat), dtype = float32) for i in range(0, len(y_hat)): # naive label is the label as horizon time steps before. # we have to consider to use only the fifth label here, too. naive_hat[i] = Y[train_to + (i * test_step) - horizon] # computing the mean squared errors of Linear and naive prediction. mse_y_hat, mse_naive_hat = 0, 0 for i in range(0, len(y_hat)): y = Y[train_to + (i * test_step)] mse_y_hat += (y_hat[i] - y) ** 2 mse_naive_hat += (naive_hat[i] - y) ** 2 mse_y_hat /= float(len(y_hat)) mse_naive_hat /= float(len(y_hat)) return mse_y_hat, mse_naive_hat
def compute_mse(regressor, param): # get wind park and corresponding target. forecast is for the target # turbine park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004) target = windpark.get_target() # use power mapping for pattern-label mapping. Feature window length # is 3 time steps and time horizon (forecast) is 3 time steps. feature_window = 6 horizon = 3 mapping = PowerMapping() X = mapping.get_features_park(windpark, feature_window, horizon) Y = mapping.get_labels_turbine(target, feature_window, horizon) # train roughly for the year 2004. train_to = int(math.floor(len(X) * 0.5)) # test roughly for the year 2005. test_to = len(X) # train and test only every fifth pattern, for performance. train_step, test_step = 5, 5 if (regressor == 'rf'): # random forest regressor reg = RandomForestRegressor(n_estimators=param, criterion='mse') reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) y_hat = reg.predict(X[train_to:test_to:test_step]) elif (regressor == 'knn'): # TODO the regressor does not need to be newly trained in # the case of KNN reg = KNeighborsRegressor(param, 'uniform') # fitting the pattern-label pairs reg = reg.fit(X[0:train_to:train_step], Y[0:train_to:train_step]) y_hat = reg.predict(X[train_to:test_to:test_step]) else: raise Exception("No regressor set.") # naive is also known as persistence model. naive_hat = zeros(len(y_hat), dtype=float32) for i in range(0, len(y_hat)): # naive label is the label as horizon time steps before. # we have to consider to use only the fifth label here, too. naive_hat[i] = Y[train_to + (i * test_step) - horizon] # computing the mean squared errors of Linear and naive prediction. mse_y_hat, mse_naive_hat = 0, 0 for i in range(0, len(y_hat)): y = Y[train_to + (i * test_step)] mse_y_hat += (y_hat[i] - y)**2 mse_naive_hat += (naive_hat[i] - y)**2 mse_y_hat /= float(len(y_hat)) mse_naive_hat /= float(len(y_hat)) return mse_y_hat, mse_naive_hat
def test_backward_copy_interpolation(self): park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 10, 2004) target = windpark.get_target() timestep = 600 measurements = target.get_measurements()[300:500] damaged, indices = MARDestroyer().destroy(measurements, percentage=.50) before_misses = MissingDataFinder().find(damaged, timestep) t_hat = BackwardCopy().interpolate(measurements, timestep=timestep) after_misses = MissingDataFinder().find(t_hat, timestep) assert(measurements.shape[0] == t_hat.shape[0]) assert(len(after_misses) < 1)
def test_mreg_interpolation(self): park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004) target = windpark.get_target() timestep = 600 measurements = target.get_measurements()[300:500] damaged, indices = MARDestroyer().destroy(measurements, percentage=.50) before_misses = MissingDataFinder().find(damaged, timestep) neighbors = windpark.get_turbines()[:-1] reg = 'knn' # KNeighborsRegressor(10, 'uniform') regargs = {'n' : 10, 'variant' : 'uniform'} nseries = [t.get_measurements()[300:500] for t in neighbors] t_hat = MRegInterpolation().interpolate(damaged, timestep=timestep,\ neighbor_series=nseries, reg=reg, regargs=regargs) after_misses = MissingDataFinder().find(t_hat, timestep) assert(len(after_misses) < 1)
def test_mreg_interpolation(self): park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004) target = windpark.get_target() timestep = 600 measurements = target.get_measurements()[300:500] damaged, indices = MARDestroyer().destroy(measurements, percentage=.50) before_misses = MissingDataFinder().find(damaged, timestep) neighbors = windpark.get_turbines()[:-1] reg = 'knn' # KNeighborsRegressor(10, 'uniform') regargs = {'n' : 8, 'variant' : 'uniform'} nseries = [t.get_measurements()[300:500] for t in neighbors] t_hat = MRegInterpolation().interpolate(damaged, timestep=timestep,\ neighbor_series=nseries, reg=reg, regargs=regargs) after_misses = MissingDataFinder().find(t_hat, timestep) assert(len(after_misses) < 1)
def test_topological_interpolation(self): park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 10, 2004) target = windpark.get_target() timestep = 600 measurements = target.get_measurements()[300:500] damaged, indices = NMARDestroyer().destroy(measurements, percentage=.80,\ min_length=10, max_length=100) tloc = (target.longitude, target.latitude) neighbors = windpark.get_turbines()[:-1] nseries = [t.get_measurements()[300:500] for t in neighbors] nlocs = [(t.longitude, t.latitude) for t in neighbors] t_hat = TopologicInterpolation().interpolate(\ damaged, method="topologic",\ timestep=timestep, location=tloc,\ neighbor_series = nseries,\ neighbor_locations = nlocs) misses = MissingDataFinder().find(t_hat, timestep) assert(measurements.shape[0] == t_hat.shape[0]) assert(len(misses) < 1)
# Stefan Oehmcke <*****@*****.**> # License: BSD 3 clause from __future__ import print_function import math import matplotlib.pyplot as plt from numpy import zeros, float32 from windml.datasets.nrel import NREL from windml.mapping.power_mapping import PowerMapping from sklearn.neighbors import KNeighborsRegressor from sklearn.metrics import mean_squared_error # get windpark and corresponding target. forecast is for the target turbine park_id = NREL.park_id['tehachapi'] windpark = NREL().get_windpark(park_id, 3, 2004, 2005) target = windpark.get_target() # use power mapping for pattern-label mapping. feature_window, horizon = 3, 3 mapping = PowerMapping() X = mapping.get_features_park(windpark, feature_window, horizon) y = mapping.get_labels_turbine(target, feature_window, horizon) # train roughly for the year 2004, test roughly for the year 2005. train_to, test_to = int(math.floor(len(X) * 0.5)), len(X) # train and test only every fifth pattern, for performance. train_step, test_step = 5, 5 X_train = X[:train_to:train_step] y_train = y[:train_to:train_step] X_test = X[train_to:test_to:test_step] y_test = y[train_to:test_to:test_step]