def create_test_pipeline(modules): regressor_svr, regressor_lin_reg = modules # Create test pipeline which works on a batch size of one hour. pipeline = Pipeline("../results/test_pipeline", batch=pd.Timedelta("1h")) # Add the svr regressor to the pipeline. This regressor should be called if it is not daytime regressor_svr_power_statistics = regressor_svr(ClockShift=pipeline["ClockShift"], ClockShift_1=pipeline["ClockShift_1"], condition=lambda x, y: not is_daytime(x, y), computation_mode=ComputationMode.Transform, callbacks=[LinePlotCallback('SVR')]) # Add the linear regressor to the pipeline. This regressor should be called if it is daytime regressor_lin_reg_power_statistics = regressor_lin_reg(ClockShift=pipeline["ClockShift"], ClockShift_1=pipeline["ClockShift_1"], condition=lambda x, y: is_daytime(x, y), computation_mode=ComputationMode.Transform, callbacks=[LinePlotCallback('LinearRegression')]) # Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file RmseCalculator()( y_hat=(regressor_svr_power_statistics, regressor_lin_reg_power_statistics), y=pipeline["load_power_statistics"], callbacks=[LinePlotCallback('RMSE'), CSVCallback('RMSE')]) return pipeline
def pipe(params): keras_model = get_keras_model(params) pipeline = Pipeline(path="../results") imputer_power_statistics = LinearInterpolater( method='nearest', dim='time', name='imputer_power')(x=pipeline['load_power_statistics']) power_scaler = SKLearnWrapper(module=StandardScaler(), name='scaler_power') scale_power_statistics = power_scaler(x=imputer_power_statistics) shift_power_statistics = ClockShift( lag=1, name='ClockShift_Lag1')(x=scale_power_statistics) shift_power_statistics2 = ClockShift( lag=2, name='ClockShift_Lag2')(x=scale_power_statistics) keras_wrapper = KerasWrapper(keras_model, fit_kwargs={'batch_size': 32, 'epochs': 100, 'verbose': 0}, compile_kwargs={'loss': 'mse', 'optimizer': 'Adam', 'metrics': ['mse']}) \ (ClockShift_Lag1=shift_power_statistics, ClockShift_Lag2=shift_power_statistics2, target=scale_power_statistics) inverse_power_scale_dl = power_scaler( x=keras_wrapper, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback('prediction')]) rmse_dl = RmseCalculator()(keras_model=inverse_power_scale_dl, y=pipeline['load_power_statistics'], callbacks=[CSVCallback('RMSE')]) pipeline.train(train) result = pipeline.test(test) return { "loss": float(result['RmseCalculator'].values), "status": STATUS_OK, "eval_time": time.time() - start }
def setUp(self) -> None: self.rmse_calculator = RmseCalculator()
class TestRMSECalculator(unittest.TestCase): def setUp(self) -> None: self.rmse_calculator = RmseCalculator() def tearDown(self) -> None: self.rmse_calculator = None def test_get_params(self): self.assertEqual(self.rmse_calculator.get_params(), {'offset': 0, 'rolling': False, 'window': 24}) def test_set_params(self): self.rmse_calculator.set_params(offset=24, rolling=True, window=2) self.assertEqual(self.rmse_calculator.get_params(), {'offset': 24, 'rolling': True, 'window': 2}) def test_transform_rolling(self): self.rmse_calculator.set_params(rolling=True, window=2) time = pd.to_datetime(['2015-06-03 00:00:00', '2015-06-03 01:00:00', '2015-06-03 02:00:00', '2015-06-03 03:00:00', '2015-06-03 04:00:00']) test_data = xr.Dataset({"testCol": ("time", xr.DataArray([-2, -1, 0, 1, 2]).data), "predictCol1": ("time", xr.DataArray([2, -3, 3, 1, -2]).data), "predictCol2": ("time", xr.DataArray([4, 4, 3, -2, 1]).data), "time": time}) test_result = self.rmse_calculator.transform(y=test_data['testCol'], gt=test_data['testCol'], pred1=test_data['predictCol1'], pred2=test_data['predictCol2']) expected_result = xr.DataArray(np.array([[np.nan, np.nan, np.nan], [0.0, np.sqrt(10), np.sqrt(30.5)], [0.0, np.sqrt(6.5), np.sqrt(17)], [0.0, np.sqrt(4.5), 3], [0.0, np.sqrt(8), np.sqrt(5)], ]), coords={"time": time, "predictions": ["gt", "pred1", "pred2"]}, dims=["time", "predictions"]) xr.testing.assert_allclose(test_result, expected_result) def test_transform(self): self.rmse_calculator.set_params() time = pd.to_datetime(['2015-06-03 00:00:00', '2015-06-03 01:00:00', '2015-06-03 02:00:00', '2015-06-03 03:00:00', '2015-06-03 04:00:00']) result_time = pd.to_datetime(['2015-06-03 04:00:00']) test_data = xr.Dataset({"testCol": ("time", xr.DataArray([-2, -1, 0, 1, 2]).data), "predictCol1": ("time", xr.DataArray([2, -3, 3, 1, -2]).data), "predictCol2": ("time", xr.DataArray([4, 4, 3, -2, 1]).data), "time": time}) test_result = self.rmse_calculator.transform(y=test_data['testCol'], gt=test_data['testCol'], pred1=test_data['predictCol1'], pred2=test_data['predictCol2']) expected_result = xr.DataArray(np.array([[0.0, 3.0, 4.0]]), coords={"time": result_time, "predictions": ["gt", "pred1", "pred2"]}, dims=["time", "predictions"]) xr.testing.assert_equal(test_result, expected_result) def test_transform_without_predictions(self): self.rmse_calculator.set_params() time = pd.to_datetime(['2015-06-03 00:00:00', '2015-06-03 01:00:00', '2015-06-03 02:00:00', '2015-06-03 03:00:00', '2015-06-03 04:00:00']) test_data = xr.Dataset({"testCol": ("time", xr.DataArray([-2, -1, 0, 1, 2]).data), "predictCol1": ("time", xr.DataArray([2, -3, 3, 1, -2]).data), "predictCol2": ("time", xr.DataArray([4, 4, 3, -2, 1]).data), "time": time}) with pytest.raises(InputNotAvailable) as e_info: self.rmse_calculator.transform(y=test_data['testCol']) self.assertEqual(e_info.value.message, "No predictions are provided as input for the RMSE Calculator. You should add the predictions " "by a seperate key word arguments if you add the RMSECalculator to the pipeline.")
calendar=calendar, target=scale_power_statistics, callbacks=[LinePlotCallback('linear_regression')], ) # Rescale the predictions to be on the original time scale inverse_power_scale = power_scaler( x=regressor_power_statistics, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback('rescale')]) # Calculate the root mean squared error (RMSE) between the linear regression and the true values # save it as csv file rmse = RmseCalculator()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"], callbacks=[CSVCallback('RMSE')]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline data = pd.read_csv("../data/getting_started_data.csv", index_col="time", parse_dates=["time"], infer_datetime_format=True, sep=",") train = data.iloc[:6000, :] pipeline.train(data=train) test = data.iloc[6000:, :] data = pipeline.test(data=test)
shift_power_statistics2 = ClockShift( lag=2, name="ClockShift_Lag2")(x=scale_power_statistics) keras_wrapper = KerasWrapper(keras_model, fit_kwargs={"batch_size": 8, "epochs": 1}, compile_kwargs={"loss": "mse", "optimizer": "Adam", "metrics": ["mse"]}) \ (ClockShift_Lag1=shift_power_statistics, ClockShift_Lag2=shift_power_statistics2, target=scale_power_statistics) inverse_power_scale_dl = power_scaler( x=keras_wrapper, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback("prediction")]) rmse_dl = RmseCalculator()(keras_model=inverse_power_scale_dl, y=pipeline["load_power_statistics"], callbacks=[CSVCallback('RMSE')]) # Now, the pipeline is complete # so we can load data and train the model data = pd.read_csv("../data/getting_started_data.csv", index_col="time", parse_dates=["time"], infer_datetime_format=True, sep=",") pipeline.train(data) pipeline.to_folder("../results/pipe_keras")