def strom_real(self): sep = os.path.sep t0 = time() path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_2012Neuendorf.csv") raw_dataset1 = DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") dates1 = DataLoader.load_from_file(path, "Datum", "\t") path2 = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_2013.csv") raw_dataset2 = DataLoader.load_from_file( path2, "Strom - Verbrauchertotal (Aktuell)", "\t") dates2 = DataLoader.load_from_file(path2, "Datum", "\t") t1 = time() dates1 = StatisticalForecast.make_hourly([int(d) for d in dates1],6) dates2 = StatisticalForecast.make_hourly([int(d) for d in dates2],6) demand1 = StatisticalForecast.make_hourly([float(val) / 1000.0 for val in raw_dataset1], 6) demand2 = StatisticalForecast.make_hourly([float(val) / 1000.0 for val in raw_dataset2], 6) t2 = time() rm = StatisticalForecast.MASE(demand1,demand1[:len(demand2)],demand2) #split_testdata1 = DayTypeForecast.split_weekdata(demand1,samples_per_hour=1,start_date=datetime.fromtimestamp(dates1[0])) #split_testdata2 = DayTypeForecast.split_weekdata(demand2,samples_per_hour=1,start_date=datetime.fromtimestamp(dates2[0])) #for index, dataset in enumerate(split_testdata1): # print self.rmse(split_testdata2[index], dataset)#StatisticalForecast.MASE(dataset, dataset[:len(split_testdata2[index])],split_testdata2[index][:len(dataset)]) t3 = time() print "t0 ", t1-t0, "t1 ", t2 - t1, "t3 ",t3-t2 print rm
def error_arrays(self): sep = os.path.sep path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_2013-6.2014Reger.csv") raw_dataset1 = DataLoader.load_from_file( path, "Energie DG Leistung", "\t") raw_dataset2 = DataLoader.load_from_file( path, "Energie EG Leistung", "\t") dates = DataLoader.load_from_file(path, "Datum", "\t") # path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_1.1-12.6.2014.csv") # raw_dataset += DataLoader.load_from_file( # path, "Strom - Verbrauchertotal (Aktuell)", "\t") # dates += DataLoader.load_from_file(path, "Datum", "\t") transf = lambda v: min(float(v) / 1000.0,500) demand = [transf(v1) + transf(v2) for v1,v2 in zip(raw_dataset1,raw_dataset2)] dates = StatisticalForecast.make_hourly([int(d) for d in dates],6) demand = StatisticalForecast.make_hourly(demand,6)#[float(val) / 1000.0 for val in raw_dataset], 6) start = calendar.timegm(datetime(year=2013,month=2,day=15).timetuple()) end = calendar.timegm(datetime(year=2013,month=8,day=15).timetuple()) fc_length = 7*24*2 #day_errors = [[0,0] for i in range(7)] #rmse, mase #hour_errors = [[0,0] for i in range(24)] period_errors = [[0,0] for i in range(14)] for timestamp in range(start, end, 24*3600): print "day:", datetime.fromtimestamp(timestamp) start_index = approximate_index(dates, timestamp) trainingdata = demand[:start_index] testdata = demand[start_index:start_index+fc_length] try: self.one_forecast(trainingdata, testdata, timestamp, timestamp+fc_length*3600,period_errors=period_errors) except: print "error, really now", sys.exc_info()[0] break l = len(range(start, end, 24*3600)) period_errors = [[r/l,m/l] for r,m in period_errors] #(forecast_values_auto, alpha, beta, gamma) = multiplicative(trainingdata, 7*24, 7*24*2, optimization_type="MASE") #print alpha, beta, gamma, rmse_auto, sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "normal", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "split", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast, testdata)]) / len(testdata)) #split_testdata = DayTypeForecast.split_weekdata(testdata,samples_per_hour=1,start_date=datetime.fromtimestamp(start_forecast)) #plot_dataset({"measured": split_testdata[5], "forecasted": electrical_forecast.forecasted_demands[5]}, 0, True) #self.export_rows({"measured": testdata, "forecasted": forecast_values_auto, "forecasted_split": forecast}) # self.export_csv({"day_errors_rmse": zip(*day_errors)[0], "day_errors_mase": zip(*day_errors)[1], # "hour_errors_rmse": zip(*hour_errors)[0], "hour_errors_mase": zip(*hour_errors)[1], # "period_errors_rmse": zip(*period_errors)[0], "hour_errors_mase": zip(*period_errors)[1]}) self.export_csv(datasets=[("period_errors_rmse", zip(*period_errors)[0]), ("period_errors_mase", zip(*period_errors)[1])], name="eval_dshw.csv")
def handle_single_data(self): sep = os.path.sep path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_1.1-12.6.2014.csv") raw_dataset = DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") dates = [int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")]#StatisticalForecast.make_hourly([int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")],6) demand = StatisticalForecast.make_hourly([float(val) / 1000.0 for val in raw_dataset], 6) start = calendar.timegm(datetime(year=2014,month=1,day=2).timetuple()) start_index = approximate_index(dates, start) train_len= 24*7*8 trainingdata = demand[start_index:start_index+train_len] test_start = start_index+train_len testdata = demand[test_start:test_start+7*24*2] start_forecast = test_start*3600 end_forecast = start_forecast + len(testdata) * 3600 electrical_forecast = DSHWForecast(BaseEnvironment(start_forecast, False, False), trainingdata, samples_per_hour=1) forecast = [electrical_forecast.get_forecast_at(timestamp) for timestamp in range(start_forecast,end_forecast,3600)] #(forecast, alpha, beta, smoothing) = linear(trainingdata, 24*6,alpha=0.4,beta=0.1) #forecast_nodaysplit, (alpha, beta, gamma), insample = multiplicative(trainingdata,24*7,len(testdata) ,optimization_type="RMSE") #forecast_nodaysplit, (alpha, beta, gamma, delta, autocorr), insample = double_seasonal(trainingdata,24,24*7,len(testdata) ,optimization_type="RMSE") #print alpha, beta, gamma, delta #print alpha, beta, gamma, rmse_auto, sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "normal", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "split", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast, testdata)]) / len(testdata)) #split_testdata = DayTypeForecast.split_weekdata(testdata,samples_per_hour=1,start_date=datetime.fromtimestamp(start_forecast)) #plot_dataset({"measured": split_testdata[5], "forecasted": electrical_forecast.forecasted_demands[5]}, 0, True) plot_dataset({"measured":testdata, "forecasted":forecast}) #self.export_rows({"measured": testdata, "forecasted daysplit": forecast, "nodaysplit": forecast_nodaysplit})#, "forecasted_split": forecast}) #self.export_csv(testdata)
def test_dshw_forecast(self): hourly_data = StatisticalForecast.make_hourly(self.dataset, 6) env = BaseEnvironment() fc = DSHWForecast(env, hourly_data, try_cache=False) self.assertTrue( len(fc.demands[0]) >= fc.input_hours, "the day series only contains " + str(len(fc.demands[0]) / 24) + " days, not " + str(fc.input_weeks * 7))
def setUp(self): # dataset containing one year of data, sampled in 10 minute intervals # really important to reset, because other devices could have added # data which is unwanted DataLoader.cached_csv = {} path = DATA_PATH + sep + "demo_electricity_2013.csv" raw_dataset = DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") # cast to float and convert to kW self.dataset = StatisticalForecast.make_hourly( [float(val) / 1000.0 for val in raw_dataset], 6)
def strom_real(self): sep = os.path.sep t0 = time() path = os.path.join( BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_2012Neuendorf.csv") raw_dataset1 = DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") dates1 = DataLoader.load_from_file(path, "Datum", "\t") path2 = os.path.join( BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_2013.csv") raw_dataset2 = DataLoader.load_from_file( path2, "Strom - Verbrauchertotal (Aktuell)", "\t") dates2 = DataLoader.load_from_file(path2, "Datum", "\t") t1 = time() dates1 = StatisticalForecast.make_hourly([int(d) for d in dates1], 6) dates2 = StatisticalForecast.make_hourly([int(d) for d in dates2], 6) demand1 = StatisticalForecast.make_hourly( [float(val) / 1000.0 for val in raw_dataset1], 6) demand2 = StatisticalForecast.make_hourly( [float(val) / 1000.0 for val in raw_dataset2], 6) t2 = time() rm = StatisticalForecast.MASE(demand1, demand1[:len(demand2)], demand2) #split_testdata1 = DayTypeForecast.split_weekdata(demand1,samples_per_hour=1,start_date=datetime.fromtimestamp(dates1[0])) #split_testdata2 = DayTypeForecast.split_weekdata(demand2,samples_per_hour=1,start_date=datetime.fromtimestamp(dates2[0])) #for index, dataset in enumerate(split_testdata1): # print self.rmse(split_testdata2[index], dataset)#StatisticalForecast.MASE(dataset, dataset[:len(split_testdata2[index])],split_testdata2[index][:len(dataset)]) t3 = time() print "t0 ", t1 - t0, "t1 ", t2 - t1, "t3 ", t3 - t2 print rm
def test_make_hourly(self): hourly_data = StatisticalForecast.make_hourly(self.dataset, 6) average = 0 for i in range(6): average += self.dataset[i] average /= 6 self.assertEqual( hourly_data[0], average, "calculated average not the same as function average") self.assertAlmostEqual(len(hourly_data), 24 * 365, delta=23, msg="data for " + str(len(hourly_data) / 24) + " days")
def one_forecast(self, trainingdata, testdata, start_forecast, end_forecast, period_errors): #electrical_forecast = DayTypeForecast(BaseEnvironment(start_forecast, False, False), trainingdata, samples_per_hour=1) #forecast = [electrical_forecast.get_forecast_at(timestamp) for timestamp in range(start_forecast,end_forecast,3600)] forecast_nodaysplit, (alpha, beta, gamma, delta, autocorr), insample = double_seasonal( trainingdata, 24, 24 * 7, len(testdata), optimization_type="RMSE") for day, length in enumerate(range(24, 14 * 24, 24)): period_errors[day][0] += self.rmse(forecast_nodaysplit[:length], testdata[:length]) period_errors[day][1] += StatisticalForecast.MASE( trainingdata, forecast_nodaysplit[:length], testdata[:length])
def handle_single_data(self): sep = os.path.sep path = os.path.join( BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_1.1-12.6.2014.csv") raw_dataset = DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") dates = [ int(d) for d in DataLoader.load_from_file(path, "Datum", "\t") ] #StatisticalForecast.make_hourly([int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")],6) demand = StatisticalForecast.make_hourly( [float(val) / 1000.0 for val in raw_dataset], 6) start = calendar.timegm( datetime(year=2014, month=1, day=2).timetuple()) start_index = approximate_index(dates, start) train_len = 24 * 7 * 8 trainingdata = demand[start_index:start_index + train_len] test_start = start_index + train_len testdata = demand[test_start:test_start + 7 * 24 * 2] start_forecast = test_start * 3600 end_forecast = start_forecast + len(testdata) * 3600 electrical_forecast = DSHWForecast(BaseEnvironment( start_forecast, False, False), trainingdata, samples_per_hour=1) forecast = [ electrical_forecast.get_forecast_at(timestamp) for timestamp in range(start_forecast, end_forecast, 3600) ] #(forecast, alpha, beta, smoothing) = linear(trainingdata, 24*6,alpha=0.4,beta=0.1) #forecast_nodaysplit, (alpha, beta, gamma), insample = multiplicative(trainingdata,24*7,len(testdata) ,optimization_type="RMSE") #forecast_nodaysplit, (alpha, beta, gamma, delta, autocorr), insample = double_seasonal(trainingdata,24,24*7,len(testdata) ,optimization_type="RMSE") #print alpha, beta, gamma, delta #print alpha, beta, gamma, rmse_auto, sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "normal", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "split", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast, testdata)]) / len(testdata)) #split_testdata = DayTypeForecast.split_weekdata(testdata,samples_per_hour=1,start_date=datetime.fromtimestamp(start_forecast)) #plot_dataset({"measured": split_testdata[5], "forecasted": electrical_forecast.forecasted_demands[5]}, 0, True) plot_dataset({"measured": testdata, "forecasted": forecast})
def test_split_week_data(self): hourly_data = StatisticalForecast.make_hourly(self.dataset, 6) env = BaseEnvironment() fc = DayTypeForecast(env, hourly_data, try_cache=False) self.assertTrue( len(fc.demands) == 7, "week_split does not contain 7 series") self.assertTrue( len(fc.demands[0]) / 24 >= fc.input_weeks, "the day series only contains " + str(len(fc.demands[0]) / 24) + " days, not " + str(fc.input_weeks) + " (or at least more than 50)") # from server.forecasting.tools import plotting for i in range(7): # plotting.Plotting.plot_dataset({"measured":fc.demands[i], "forecasted": fc.forecasted_demands[i]}, len(fc.demands[i]), block=True) rmse = self.rmse(self.dataset_2014[:len(fc.forecasted_demands[i])], fc.forecasted_demands[i]) self.assertTrue( rmse < 30.0, "MSE of " + str(rmse) + "for day" + str(i) + " is way too high")
def __init__(self, device_id, env): super(SimulatedElectricalConsumer, self).__init__(device_id, env) # ! TODO: this will have to replaced by a database" global electrical_forecast if electrical_forecast == None and not env.is_demo_simulation(): # ! TODO: this will have to replaced by a database" raw_dataset = self.get_data_until(self.env.now) # cast to float and convert to kW dataset = [float(val) / 1000.0 for val in raw_dataset] hourly_data = StatisticalForecast.make_hourly(dataset, 6) electrical_forecast = DSHWForecast( self.env, hourly_data, samples_per_hour=1) self.electrical_forecast = electrical_forecast self.new_data_interval = 24 * 60 * 60 # append data each day self.last_forecast_update = self.env.now # cache the forecast for better performance self.start_timestamp = self.env.initial_date global all_data if all_data == None: all_data = self.get_all_data2014()
def __init__(self, device_id, env): super(SimulatedElectricalConsumer, self).__init__(device_id, env) # ! TODO: this will have to replaced by a database" global electrical_forecast if electrical_forecast == None and not env.is_demo_simulation(): # ! TODO: this will have to replaced by a database" raw_dataset = self.get_data_until(self.env.now) # cast to float and convert to kW dataset = [float(val) / 1000.0 for val in raw_dataset] hourly_data = StatisticalForecast.make_hourly(dataset, 6) electrical_forecast = DSHWForecast(self.env, hourly_data, samples_per_hour=1) self.electrical_forecast = electrical_forecast self.new_data_interval = 24 * 60 * 60 # append data each day self.last_forecast_update = self.env.now # cache the forecast for better performance self.start_timestamp = self.env.initial_date global all_data if all_data == None: all_data = self.get_all_data2014()
def test_append_data(self): self.setup_forecast() path = DATA_PATH + sep + "demo_electricity_2014.csv" raw_dataset_2014 = DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") # cast to float and convert to kW dataset_2014 = StatisticalForecast.make_hourly( [float(val) / 1000.0 for val in raw_dataset_2014], 6) start = datetime(year=2014, month=1, day=1) split_demands14 = DayTypeForecast.split_weekdata( dataset_2014, 1, start) self.forecast.append_values(dataset_2014, start) four_weeks = 24 * 4 # check that arrays on same weekdays are equal self.assertSequenceEqual( self.forecast.demands[start.weekday()][-four_weeks:], split_demands14[start.weekday()][-four_weeks:]) self.assertSequenceEqual(self.forecast.demands[3][-four_weeks:], split_demands14[3][-four_weeks:])
def value_changer(): try: from matplotlib.widgets import Slider, Button, RadioButtons from pylab import axes except: print "ljdlj" sep = os.path.sep path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "simulation" + sep + "demodata" + sep + "demo_electricity_2014.csv") raw_data = DataLoader.load_from_file(path, "Strom - Verbrauchertotal (Aktuell)",delim="\t") ind = len(raw_data) / 2 kW_data = StatisticalForecast.make_hourly([float(val) / 1000.0 for val in raw_data],6) #cast to float and convert to kW dates = [int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")] input = make_hourly(kW_data,6)[-24*7:] start = calendar.timegm(datetime(year=2014,month=1,day=2).timetuple()) start_index = approximate_index(dates, start) train_len= 24*7*8 trainingdata = kW_data[start_index:start_index+train_len] test_start = start_index+train_len testdata = kW_data[test_start:test_start+7*24*2] start_forecast = test_start*3600 end_forecast = start_forecast + len(testdata) * 3600 alpha = 0.0000001 beta = 0.0 gamma = 0.05 delta = 0.01 autocorr = 0.01 #plot_dataset(values) m = 24 m2 = 24 * 7 #forecast length fc = int(len(testdata)) forecast_values, params, insample = double_seasonal(trainingdata, m,m2,fc, alpha, beta, gamma,delta,autocorr) values ={ 'forecasting':forecast_values, 'measured':testdata} (fig, sim_plot,forecast_plot) = plot_dataset(values, 0,block=False) axcolor = 'lightgoldenrodyellow' axalpa = axes([0.25, 0.02, 0.65, 0.02], axisbg=axcolor) axautocorr = axes([0.25, 0.05, 0.65, 0.02], axisbg=axcolor) axgamma = axes([0.25, 0.08, 0.65, 0.02], axisbg=axcolor) axdelta = axes([0.25, 0.11, 0.65, 0.02], axisbg=axcolor) alpha_slider = Slider(axalpa, 'Alpha', 0.0, 1.0, valinit=alpha) gamma_slider = Slider(axgamma, 'Gamma', 0.0, 1.0, valinit=gamma) delta_slider = Slider(axdelta, 'Delta', 0.0, 1.0, valinit=delta) autocorr_slider = Slider(axautocorr, 'autocorr_slider', 0.0, 1.0, valinit=autocorr) def update_hw(val): alpha = alpha_slider.val autocorr = autocorr_slider.val beta = 0.0 gamma = gamma_slider.val delta = delta_slider.val forecast_values, params, insample = double_seasonal(trainingdata, m,m2,fc, alpha, beta, gamma,delta,autocorr) values ={ 'forecasting':forecast_values, 'measured':testdata} forecast_plot.set_ydata(forecast_values) sim_plot.set_ydata(testdata) fig.canvas.draw_idle() print alpha, autocorr, gamma, MSE(testdata, forecast_values) alpha_slider.on_changed(update_hw) autocorr_slider.on_changed(update_hw) gamma_slider.on_changed(update_hw) delta_slider.on_changed(update_hw) plt.show()
def setup_forecast(self): hourly_data = StatisticalForecast.make_hourly(self.dataset, 6) self.env = BaseEnvironment() self.forecast = DayTypeForecast(self.env, hourly_data, 1, None, (0.0000000, 0.0, 1.0))
def error_arrays(self): sep = os.path.sep path = os.path.join( BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_2013-6.2014Reger.csv") raw_dataset1 = DataLoader.load_from_file(path, "Energie DG Leistung", "\t") raw_dataset2 = DataLoader.load_from_file(path, "Energie EG Leistung", "\t") dates = DataLoader.load_from_file(path, "Datum", "\t") # path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_1.1-12.6.2014.csv") # raw_dataset += DataLoader.load_from_file( # path, "Strom - Verbrauchertotal (Aktuell)", "\t") # dates += DataLoader.load_from_file(path, "Datum", "\t") transf = lambda v: min(float(v) / 1000.0, 500) demand = [ transf(v1) + transf(v2) for v1, v2 in zip(raw_dataset1, raw_dataset2) ] dates = StatisticalForecast.make_hourly([int(d) for d in dates], 6) demand = StatisticalForecast.make_hourly( demand, 6) #[float(val) / 1000.0 for val in raw_dataset], 6) start = calendar.timegm( datetime(year=2013, month=2, day=15).timetuple()) end = calendar.timegm(datetime(year=2013, month=8, day=15).timetuple()) fc_length = 7 * 24 * 2 #day_errors = [[0,0] for i in range(7)] #rmse, mase #hour_errors = [[0,0] for i in range(24)] period_errors = [[0, 0] for i in range(14)] for timestamp in range(start, end, 24 * 3600): print "day:", datetime.fromtimestamp(timestamp) start_index = approximate_index(dates, timestamp) trainingdata = demand[:start_index] testdata = demand[start_index:start_index + fc_length] try: self.one_forecast(trainingdata, testdata, timestamp, timestamp + fc_length * 3600, period_errors=period_errors) except: print "error, really now", sys.exc_info()[0] break l = len(range(start, end, 24 * 3600)) period_errors = [[r / l, m / l] for r, m in period_errors] #(forecast_values_auto, alpha, beta, gamma) = multiplicative(trainingdata, 7*24, 7*24*2, optimization_type="MASE") #print alpha, beta, gamma, rmse_auto, sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "normal", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "split", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast, testdata)]) / len(testdata)) #split_testdata = DayTypeForecast.split_weekdata(testdata,samples_per_hour=1,start_date=datetime.fromtimestamp(start_forecast)) #plot_dataset({"measured": split_testdata[5], "forecasted": electrical_forecast.forecasted_demands[5]}, 0, True) #self.export_rows({"measured": testdata, "forecasted": forecast_values_auto, "forecasted_split": forecast}) # self.export_csv({"day_errors_rmse": zip(*day_errors)[0], "day_errors_mase": zip(*day_errors)[1], # "hour_errors_rmse": zip(*hour_errors)[0], "hour_errors_mase": zip(*hour_errors)[1], # "period_errors_rmse": zip(*period_errors)[0], "hour_errors_mase": zip(*period_errors)[1]}) self.export_csv(datasets=[ ("period_errors_rmse", zip(*period_errors)[0]), ("period_errors_mase", zip(*period_errors)[1]) ], name="eval_dshw.csv")