def create_test_pipeline(modules):
    regressor_svr, regressor_lin_reg = modules

    # Create test pipeline which works on a batch size of one hour.
    pipeline = Pipeline("../results/test_pipeline", batch=pd.Timedelta("1h"))

    # Add the svr regressor to the pipeline. This regressor should be called if it is not daytime
    regressor_svr_power_statistics = regressor_svr(ClockShift=pipeline["ClockShift"],
                                                   ClockShift_1=pipeline["ClockShift_1"],
                                                   condition=lambda x, y: not is_daytime(x, y),
                                                   computation_mode=ComputationMode.Transform,
                                                   callbacks=[LinePlotCallback('SVR')])

    # Add the linear regressor to the pipeline. This regressor should be called if it is daytime
    regressor_lin_reg_power_statistics = regressor_lin_reg(ClockShift=pipeline["ClockShift"],
                                                           ClockShift_1=pipeline["ClockShift_1"],
                                                           condition=lambda x, y: is_daytime(x, y),
                                                           computation_mode=ComputationMode.Transform,
                                                           callbacks=[LinePlotCallback('LinearRegression')])

    # Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file
    RmseCalculator()(
        y_hat=(regressor_svr_power_statistics, regressor_lin_reg_power_statistics), y=pipeline["load_power_statistics"],
        callbacks=[LinePlotCallback('RMSE'), CSVCallback('RMSE')])

    return pipeline
示例#2
0
    def pipe(params):
        keras_model = get_keras_model(params)

        pipeline = Pipeline(path="../results")

        imputer_power_statistics = LinearInterpolater(
            method='nearest', dim='time',
            name='imputer_power')(x=pipeline['load_power_statistics'])

        power_scaler = SKLearnWrapper(module=StandardScaler(),
                                      name='scaler_power')
        scale_power_statistics = power_scaler(x=imputer_power_statistics)

        shift_power_statistics = ClockShift(
            lag=1, name='ClockShift_Lag1')(x=scale_power_statistics)
        shift_power_statistics2 = ClockShift(
            lag=2, name='ClockShift_Lag2')(x=scale_power_statistics)

        keras_wrapper = KerasWrapper(keras_model,
                                     fit_kwargs={'batch_size': 32, 'epochs': 100, 'verbose': 0},
                                     compile_kwargs={'loss': 'mse', 'optimizer': 'Adam', 'metrics': ['mse']}) \
            (ClockShift_Lag1=shift_power_statistics,
             ClockShift_Lag2=shift_power_statistics2,
             target=scale_power_statistics)

        inverse_power_scale_dl = power_scaler(
            x=keras_wrapper,
            computation_mode=ComputationMode.Transform,
            use_inverse_transform=True,
            callbacks=[LinePlotCallback('prediction')])

        rmse_dl = RmseCalculator()(keras_model=inverse_power_scale_dl,
                                   y=pipeline['load_power_statistics'],
                                   callbacks=[CSVCallback('RMSE')])

        pipeline.train(train)
        result = pipeline.test(test)

        return {
            "loss": float(result['RmseCalculator'].values),
            "status": STATUS_OK,
            "eval_time": time.time() - start
        }
示例#3
0
def create_test_pipeline(modules):
    regressor_svr, regressor_lin_reg = modules

    # Create test pipeline which works on a batch size of one hour.
    pipeline = Pipeline("../results/test_pipeline", batch=pd.Timedelta("1h"))

    # Add the svr regressor to the pipeline. This regressor should be called if it is not daytime
    regressor_svr_power_statistics = regressor_svr(
        ClockShift=pipeline["ClockShift"],
        ClockShift_1=pipeline["ClockShift_1"],
        condition=lambda x, y: not is_daytime(x, y),
        computation_mode=ComputationMode.Transform,
        callbacks=[LinePlotCallback('SVR')])

    # Add the linear regressor to the pipeline. This regressor should be called if it is daytime
    regressor_lin_reg_power_statistics = regressor_lin_reg(
        ClockShift=pipeline["ClockShift"],
        ClockShift_1=pipeline["ClockShift_1"],
        condition=lambda x, y: is_daytime(x, y),
        computation_mode=ComputationMode.Transform,
        callbacks=[LinePlotCallback('LinearRegression')])

    # TODO what kind of RMSE has to be used here?
    #   * Rolling would not work, since the complete RMSE should be calculated for each Time Point
    #   * Summary do not work, since summaries are only executed once
    #   Is the current solution useful?
    #   Possible Solution: window_size=-1 means that the window is from the start until the current point in time.
    #                      In that case, the online learning has to be built in that way, that module only calculate
    #                      data for the desired/requested time steps.

    # Calculate the root mean squared error (RMSE) between the linear regression and the true values, save it as csv file
    RollingRMSE(window_size=1, window_size_unit="d")(
        y_hat=(regressor_svr_power_statistics,
               regressor_lin_reg_power_statistics),
        y=pipeline["load_power_statistics"],
        callbacks=[LinePlotCallback('RMSE'),
                   CSVCallback('RMSE')])

    return pipeline
    print("Start training")
    train_pipeline.train(data)
    print("Training finished")

    # Create a second pipeline. Necessary, since this pipeline has additional steps in contrast to the train pipeline.
    pipeline = Pipeline(path="../results")

    # Get preprocessing pipeline
    preprocessing_pipeline = create_preprocessing_pipeline(power_scaler)
    preprocessing_pipeline = preprocessing_pipeline(scaler_power=pipeline["load_power_statistics"])

    # Get the test pipeline, the arguments are the modules, from the training pipeline, which should be reused
    test_pipeline = create_test_pipeline([regressor_lin_reg, regressor_svr])

    test_pipeline(ClockShift=preprocessing_pipeline["ClockShift"],
                  ClockShift_1=preprocessing_pipeline["ClockShift_1"],
                  load_power_statistics=pipeline["load_power_statistics"],
                  callbacks=[LinePlotCallback('Pipeline'), CSVCallback('Pipeline')])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    print("Start testing")
    result = pipeline.test(test)

    pipeline.to_folder("stored_day_and_night")
    pipeline = Pipeline.from_folder("stored_day_and_night")
    print("Testing finished")
    result2 = pipeline.test(test)

    print("FINISHED")
示例#5
0
            target=scale_power_statistics,
            callbacks=[LinePlotCallback('linear_regression')],
        )

    # Rescale the predictions to be on the original time scale
    inverse_power_scale = power_scaler(
        x=regressor_power_statistics,
        computation_mode=ComputationMode.Transform,
        use_inverse_transform=True,
        callbacks=[LinePlotCallback('rescale')])

    # Calculate the root mean squared error (RMSE) between the linear regression and the true values
    # save it as csv file
    rmse = RmseCalculator()(y_hat=inverse_power_scale,
                            y=pipeline["load_power_statistics"],
                            callbacks=[CSVCallback('RMSE')])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    data = pd.read_csv("../data/getting_started_data.csv",
                       index_col="time",
                       parse_dates=["time"],
                       infer_datetime_format=True,
                       sep=",")
    train = data.iloc[:6000, :]
    pipeline.train(data=train)

    test = data.iloc[6000:, :]
    data = pipeline.test(data=test)

    # Save the pipeline to a folder
示例#6
0
    # Create a second pipeline. Necessary, since this pipeline has additional steps in contrast to the train pipeline.
    pipeline = Pipeline(path="../results")

    # Get preprocessing pipeline
    preprocessing_pipeline = create_preprocessing_pipeline(power_scaler)
    preprocessing_pipeline = preprocessing_pipeline(
        scaler_power=pipeline["load_power_statistics"])

    # Get the test pipeline, the arguments are the modules, from the training pipeline, which should be reused
    test_pipeline = create_test_pipeline([regressor_lin_reg, regressor_svr])

    test_pipeline(
        ClockShift=preprocessing_pipeline["ClockShift"],
        ClockShift_1=preprocessing_pipeline["ClockShift_1"],
        load_power_statistics=pipeline["load_power_statistics"],
        callbacks=[LinePlotCallback('Pipeline'),
                   CSVCallback('Pipeline')])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    print("Start testing")
    result = pipeline.test(test)

    pipeline.to_folder("stored_day_and_night")
    pipeline = Pipeline.from_folder("stored_day_and_night")
    print("Testing finished")
    result2 = pipeline.test(test)

    print("FINISHED")
示例#7
0
def custom_multiplication(x: xr.Dataset):
    # Multiply the given dataset with 100.
    return x * 1000


# The main function is where the pipeline is created and run
if __name__ == "__main__":
    # Create a pipeline
    pipeline = Pipeline(path="../results")

    # Add a custom function to the FunctionModule and add the module to the pipeline
    function_module = FunctionModule(
        custom_multiplication, name="Multiplication")(
            x=pipeline["load_power_statistics"],
            callbacks=[CSVCallback("Mul"),
                       LinePlotCallback("Mul")])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    df = pd.read_csv("../data/getting_started_data.csv",
                     parse_dates=["time"],
                     infer_datetime_format=True,
                     index_col="time")

    pipeline.train(df)

    # Generate a plot of the pipeline showing the flow of data through different modules
    pipeline.draw()
    plt.show()
from pywatts.callbacks import CSVCallback, LinePlotCallback

# All modules required for the pipeline are imported
from pywatts.wrapper import FunctionModule


def custom_multiplication(x: xr.Dataset):
    # Multiply the given dataset with 100.
    return x * 1000


# The main function is where the pipeline is created and run
if __name__ == "__main__":
    # Create a pipeline
    pipeline = Pipeline(path="../results")

    # Add a custom function to the FunctionModule and add the module to the pipeline
    function_module = FunctionModule(custom_multiplication, name="Multiplication")(x=pipeline["load_power_statistics"],
                                                                                   callbacks=[CSVCallback("Mul"), LinePlotCallback("Mul")])

    # Now, the pipeline is complete so we can run it and explore the results
    # Start the pipeline
    df = pd.read_csv("../data/getting_started_data.csv", parse_dates=["time"], infer_datetime_format=True,
                     index_col="time")

    pipeline.train(df)

    # Generate a plot of the pipeline showing the flow of data through different modules
    pipeline.draw()
    plt.show()