def create_test_pipeline(modules): regressor_svr, regressor_lin_reg = modules # Create test pipeline which works on a batch size of one hour. pipeline = Pipeline("../results/test_pipeline", batch=pd.Timedelta("1h")) # Add the svr regressor to the pipeline. This regressor should be called if it is not daytime regressor_svr_power_statistics = regressor_svr(ClockShift=pipeline["ClockShift"], ClockShift_1=pipeline["ClockShift_1"], condition=lambda x, y: not is_daytime(x, y), computation_mode=ComputationMode.Transform, callbacks=[LinePlotCallback('SVR')]) # Add the linear regressor to the pipeline. This regressor should be called if it is daytime regressor_lin_reg_power_statistics = regressor_lin_reg(ClockShift=pipeline["ClockShift"], ClockShift_1=pipeline["ClockShift_1"], condition=lambda x, y: is_daytime(x, y), computation_mode=ComputationMode.Transform, callbacks=[LinePlotCallback('LinearRegression')]) # Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file RmseCalculator()( y_hat=(regressor_svr_power_statistics, regressor_lin_reg_power_statistics), y=pipeline["load_power_statistics"], callbacks=[LinePlotCallback('RMSE'), CSVCallback('RMSE')]) return pipeline
def pipe(params): keras_model = get_keras_model(params) pipeline = Pipeline(path="../results") imputer_power_statistics = LinearInterpolater( method='nearest', dim='time', name='imputer_power')(x=pipeline['load_power_statistics']) power_scaler = SKLearnWrapper(module=StandardScaler(), name='scaler_power') scale_power_statistics = power_scaler(x=imputer_power_statistics) shift_power_statistics = ClockShift( lag=1, name='ClockShift_Lag1')(x=scale_power_statistics) shift_power_statistics2 = ClockShift( lag=2, name='ClockShift_Lag2')(x=scale_power_statistics) keras_wrapper = KerasWrapper(keras_model, fit_kwargs={'batch_size': 32, 'epochs': 100, 'verbose': 0}, compile_kwargs={'loss': 'mse', 'optimizer': 'Adam', 'metrics': ['mse']}) \ (ClockShift_Lag1=shift_power_statistics, ClockShift_Lag2=shift_power_statistics2, target=scale_power_statistics) inverse_power_scale_dl = power_scaler( x=keras_wrapper, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback('prediction')]) rmse_dl = RmseCalculator()(keras_model=inverse_power_scale_dl, y=pipeline['load_power_statistics'], callbacks=[CSVCallback('RMSE')]) pipeline.train(train) result = pipeline.test(test) return { "loss": float(result['RmseCalculator'].values), "status": STATUS_OK, "eval_time": time.time() - start }
def create_test_pipeline(modules): regressor_svr, regressor_lin_reg = modules # Create test pipeline which works on a batch size of one hour. pipeline = Pipeline("../results/test_pipeline", batch=pd.Timedelta("1h")) # Add the svr regressor to the pipeline. This regressor should be called if it is not daytime regressor_svr_power_statistics = regressor_svr( ClockShift=pipeline["ClockShift"], ClockShift_1=pipeline["ClockShift_1"], condition=lambda x, y: not is_daytime(x, y), computation_mode=ComputationMode.Transform, callbacks=[LinePlotCallback('SVR')]) # Add the linear regressor to the pipeline. This regressor should be called if it is daytime regressor_lin_reg_power_statistics = regressor_lin_reg( ClockShift=pipeline["ClockShift"], ClockShift_1=pipeline["ClockShift_1"], condition=lambda x, y: is_daytime(x, y), computation_mode=ComputationMode.Transform, callbacks=[LinePlotCallback('LinearRegression')]) # TODO what kind of RMSE has to be used here? # * Rolling would not work, since the complete RMSE should be calculated for each Time Point # * Summary do not work, since summaries are only executed once # Is the current solution useful? # Possible Solution: window_size=-1 means that the window is from the start until the current point in time. # In that case, the online learning has to be built in that way, that module only calculate # data for the desired/requested time steps. # Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file RollingRMSE(window_size=1, window_size_unit="d")( y_hat=(regressor_svr_power_statistics, regressor_lin_reg_power_statistics), y=pipeline["load_power_statistics"], callbacks=[LinePlotCallback('RMSE'), CSVCallback('RMSE')]) return pipeline
print("Start training") train_pipeline.train(data) print("Training finished") # Create a second pipeline. Necessary, since this pipeline has additional steps in contrast to the train pipeline. pipeline = Pipeline(path="../results") # Get preprocessing pipeline preprocessing_pipeline = create_preprocessing_pipeline(power_scaler) preprocessing_pipeline = preprocessing_pipeline(scaler_power=pipeline["load_power_statistics"]) # Get the test pipeline, the arguments are the modules, from the training pipeline, which should be reused test_pipeline = create_test_pipeline([regressor_lin_reg, regressor_svr]) test_pipeline(ClockShift=preprocessing_pipeline["ClockShift"], ClockShift_1=preprocessing_pipeline["ClockShift_1"], load_power_statistics=pipeline["load_power_statistics"], callbacks=[LinePlotCallback('Pipeline'), CSVCallback('Pipeline')]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline print("Start testing") result = pipeline.test(test) pipeline.to_folder("stored_day_and_night") pipeline = Pipeline.from_folder("stored_day_and_night") print("Testing finished") result2 = pipeline.test(test) print("FINISHED")
target=scale_power_statistics, callbacks=[LinePlotCallback('linear_regression')], ) # Rescale the predictions to be on the original time scale inverse_power_scale = power_scaler( x=regressor_power_statistics, computation_mode=ComputationMode.Transform, use_inverse_transform=True, callbacks=[LinePlotCallback('rescale')]) # Calculate the root mean squared error (RMSE) between the linear regression and the true values # save it as csv file rmse = RmseCalculator()(y_hat=inverse_power_scale, y=pipeline["load_power_statistics"], callbacks=[CSVCallback('RMSE')]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline data = pd.read_csv("../data/getting_started_data.csv", index_col="time", parse_dates=["time"], infer_datetime_format=True, sep=",") train = data.iloc[:6000, :] pipeline.train(data=train) test = data.iloc[6000:, :] data = pipeline.test(data=test) # Save the pipeline to a folder
# Create a second pipeline. Necessary, since this pipeline has additional steps in contrast to the train pipeline. pipeline = Pipeline(path="../results") # Get preprocessing pipeline preprocessing_pipeline = create_preprocessing_pipeline(power_scaler) preprocessing_pipeline = preprocessing_pipeline( scaler_power=pipeline["load_power_statistics"]) # Get the test pipeline, the arguments are the modules, from the training pipeline, which should be reused test_pipeline = create_test_pipeline([regressor_lin_reg, regressor_svr]) test_pipeline( ClockShift=preprocessing_pipeline["ClockShift"], ClockShift_1=preprocessing_pipeline["ClockShift_1"], load_power_statistics=pipeline["load_power_statistics"], callbacks=[LinePlotCallback('Pipeline'), CSVCallback('Pipeline')]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline print("Start testing") result = pipeline.test(test) pipeline.to_folder("stored_day_and_night") pipeline = Pipeline.from_folder("stored_day_and_night") print("Testing finished") result2 = pipeline.test(test) print("FINISHED")
def custom_multiplication(x: xr.Dataset): # Multiply the given dataset with 100. return x * 1000 # The main function is where the pipeline is created and run if __name__ == "__main__": # Create a pipeline pipeline = Pipeline(path="../results") # Add a custom function to the FunctionModule and add the module to the pipeline function_module = FunctionModule( custom_multiplication, name="Multiplication")( x=pipeline["load_power_statistics"], callbacks=[CSVCallback("Mul"), LinePlotCallback("Mul")]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline df = pd.read_csv("../data/getting_started_data.csv", parse_dates=["time"], infer_datetime_format=True, index_col="time") pipeline.train(df) # Generate a plot of the pipeline showing the flow of data through different modules pipeline.draw() plt.show()
from pywatts.callbacks import CSVCallback, LinePlotCallback # All modules required for the pipeline are imported from pywatts.wrapper import FunctionModule def custom_multiplication(x: xr.Dataset): # Multiply the given dataset with 100. return x * 1000 # The main function is where the pipeline is created and run if __name__ == "__main__": # Create a pipeline pipeline = Pipeline(path="../results") # Add a custom function to the FunctionModule and add the module to the pipeline function_module = FunctionModule(custom_multiplication, name="Multiplication")(x=pipeline["load_power_statistics"], callbacks=[CSVCallback("Mul"), LinePlotCallback("Mul")]) # Now, the pipeline is complete so we can run it and explore the results # Start the pipeline df = pd.read_csv("../data/getting_started_data.csv", parse_dates=["time"], infer_datetime_format=True, index_col="time") pipeline.train(df) # Generate a plot of the pipeline showing the flow of data through different modules pipeline.draw() plt.show()