示例#1
0
    def test_batch_1_transform(self, concat_mock, fm_mock):
        time = pd.date_range('2000-01-01', freq='1H', periods=7)
        da = xr.DataArray([2, 3, 4, 3, 3, 1, 2],
                          dims=["time"],
                          coords={'time': time})
        pipeline = Pipeline(batch=pd.Timedelta("1h"))
        step_one = MagicMock()
        step_one.get_result.return_value = {"step": da}
        step_one.name = "step"
        result_mock = MagicMock()
        concat_mock.return_value = result_mock
        pipeline.start_steps["foo"] = StartStep("foo"), None
        pipeline.start_steps["foo"][0].last = False
        step_one.further_elements.side_effect = [
            True, True, True, True, True, True, True, False
        ]
        pipeline.add(module=step_one, input_ids=[1])

        result = pipeline.transform(foo=da)

        self.assertEqual(concat_mock.call_count, 6)
        self.assertEqual(step_one.get_result.call_count, 7)
        self.assertEqual(step_one.further_elements.call_count, 8)
        self.assertEqual({"step": result_mock}, result)
if __name__ == "__main__":
    # Read the data via pandas.
    data = pd.read_csv("../data/getting_started_data.csv", parse_dates=["time"], infer_datetime_format=True,
                       index_col="time")

    # Split the data into train and test data.
    train = data[:6000]
    test = data[8700:]

    # Create all modules which are used multiple times.
    regressor_lin_reg = SKLearnWrapper(module=LinearRegression(fit_intercept=True), name="Regression")
    regressor_svr = SKLearnWrapper(module=SVR(), name="Regression")
    power_scaler = SKLearnWrapper(module=StandardScaler(), name="scaler_power")

    # Build a train pipeline. In this pipeline, each step processes all data at once.
    train_pipeline = Pipeline(path="../results/train")

    # Create preprocessing pipeline for the preprocessing steps
    preprocessing_pipeline = create_preprocessing_pipeline(power_scaler)
    preprocessing_pipeline = preprocessing_pipeline(scaler_power=train_pipeline["load_power_statistics"])

    # Addd the regressors to the train pipeline
    regressor_lin_reg(ClockShift=preprocessing_pipeline["ClockShift"],
                      ClockShift_1=preprocessing_pipeline["ClockShift_1"],
                      target=train_pipeline["load_power_statistics"],
                      callbacks=[LinePlotCallback('LinearRegression')])
    regressor_svr(ClockShift=preprocessing_pipeline["ClockShift"],
                  ClockShift_1=preprocessing_pipeline["ClockShift_1"],
                  target=train_pipeline["load_power_statistics"],
                  callbacks=[LinePlotCallback('SVR')])
示例#3
0
 def test_get_params(self, fm_mock):
     result = Pipeline(batch=pd.Timedelta("1h")).get_params()
     self.assertEqual(result, {"batch": pd.Timedelta("1h")})
示例#4
0
 def setUp(self, fm_mock) -> None:
     self.fm_mock = fm_mock()
     self.pipeline = Pipeline()
示例#5
0
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

# From pyWATTS the pipeline is imported
from pywatts.callbacks import LinePlotCallback
from pywatts.core.computation_mode import ComputationMode
from pywatts.core.pipeline import Pipeline
# All modules required for the pipeline are imported
from pywatts.modules import CalendarExtraction, CalendarFeature, ClockShift, LinearInterpolater, SKLearnWrapper
from pywatts.summaries import RMSE

# The main function is where the pipeline is created and run
if __name__ == "__main__":
    # Create a pipeline
    pipeline = Pipeline(path="../results")

    # Extract dummy calender features, using holidays from Germany
    # NOTE: CalendarExtraction can't return multiple features.
    calendar = CalendarExtraction(continent="Europe",
                                  country="Germany",
                                  features=[
                                      CalendarFeature.month,
                                      CalendarFeature.weekday,
                                      CalendarFeature.weekend
                                  ])(x=pipeline["load_power_statistics"])

    # Deal with missing values through linear interpolation
    imputer_power_statistics = LinearInterpolater(
        method="nearest", dim="time",
        name="imputer_power")(x=pipeline["load_power_statistics"])
示例#6
0
# Other modules required for the pipeline are imported
import pandas as pd
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.arima_model import ARIMA

from pywatts.callbacks import CSVCallback, LinePlotCallback
from pywatts.core.computation_mode import ComputationMode
from pywatts.core.pipeline import Pipeline

# All modules required for the pipeline are imported
from pywatts.modules import CalendarExtraction, CalendarFeature, ClockShift, LinearInterpolater, RmseCalculator, \
    SKLearnWrapper, SmTimeSeriesModelWrapper

if __name__ == "__main__":
    # Create a pipeline
    pipeline = Pipeline(path="../results/statsmodel")

    # Extract dummy calender features, using holidays from Germany
    cal_features = CalendarExtraction(features=[CalendarFeature.hour, CalendarFeature.weekday, CalendarFeature.month],
                                      continent="Europe", country="Germany"
                                      )(x=pipeline["load_power_statistics"])

    # Deal with missing values through linear interpolation
    imputer_power_statistics = LinearInterpolater(
        method="nearest", dim="time", name="imputer_power"
    )(x=pipeline["load_power_statistics"])

    # Scale the data using a standard SKLearn scaler
    power_scaler = SKLearnWrapper(module=StandardScaler(), name="scaler_power")
    scale_power_statistics = power_scaler(x=imputer_power_statistics)
示例#7
0
from pywatts.modules.trend_extraction import TrendExtraction
from pywatts.utils._xarray_time_series_utils import numpy_to_xarray
from pywatts.wrapper.function_module import FunctionModule

# NOTE If you choose a horizon greater than 24 you have to shift the profile -> Else future values may be considered for calculating the profile.
HORIZON = 24


def get_diff(x, profile):
    return numpy_to_xarray(x.values - profile.values, x, "difference")


drift_occured = False

if __name__ == "__main__":
    pipeline = Pipeline("pnn_pipeline")

    profile_moving = RollingMean(
        window_size=28,
        group_by=RollingGroupBy.WorkdayWeekend)(x=(pipeline["BldgX"]))
    difference = FunctionModule(get_diff)(x=pipeline["BldgX"],
                                          profile=profile_moving)
    trend = TrendExtraction(168, 5)(x=difference)
    calendar = CalendarExtraction(
        country="BadenWurttemberg",
        features=[
            CalendarFeature.hour_sine, CalendarFeature.month_sine,
            CalendarFeature.day_sine, CalendarFeature.monday,
            CalendarFeature.tuesday, CalendarFeature.wednesday,
            CalendarFeature.thursday, CalendarFeature.friday,
            CalendarFeature.hour_cos, CalendarFeature.day_cos,