def test_batch_1_transform(self, concat_mock, fm_mock): time = pd.date_range('2000-01-01', freq='1H', periods=7) da = xr.DataArray([2, 3, 4, 3, 3, 1, 2], dims=["time"], coords={'time': time}) pipeline = Pipeline(batch=pd.Timedelta("1h")) step_one = MagicMock() step_one.get_result.return_value = {"step": da} step_one.name = "step" result_mock = MagicMock() concat_mock.return_value = result_mock pipeline.start_steps["foo"] = StartStep("foo"), None pipeline.start_steps["foo"][0].last = False step_one.further_elements.side_effect = [ True, True, True, True, True, True, True, False ] pipeline.add(module=step_one, input_ids=[1]) result = pipeline.transform(foo=da) self.assertEqual(concat_mock.call_count, 6) self.assertEqual(step_one.get_result.call_count, 7) self.assertEqual(step_one.further_elements.call_count, 8) self.assertEqual({"step": result_mock}, result)
if __name__ == "__main__": # Read the data via pandas. data = pd.read_csv("../data/getting_started_data.csv", parse_dates=["time"], infer_datetime_format=True, index_col="time") # Split the data into train and test data. train = data[:6000] test = data[8700:] # Create all modules which are used multiple times. regressor_lin_reg = SKLearnWrapper(module=LinearRegression(fit_intercept=True), name="Regression") regressor_svr = SKLearnWrapper(module=SVR(), name="Regression") power_scaler = SKLearnWrapper(module=StandardScaler(), name="scaler_power") # Build a train pipeline. In this pipeline, each step processes all data at once. train_pipeline = Pipeline(path="../results/train") # Create preprocessing pipeline for the preprocessing steps preprocessing_pipeline = create_preprocessing_pipeline(power_scaler) preprocessing_pipeline = preprocessing_pipeline(scaler_power=train_pipeline["load_power_statistics"]) # Addd the regressors to the train pipeline regressor_lin_reg(ClockShift=preprocessing_pipeline["ClockShift"], ClockShift_1=preprocessing_pipeline["ClockShift_1"], target=train_pipeline["load_power_statistics"], callbacks=[LinePlotCallback('LinearRegression')]) regressor_svr(ClockShift=preprocessing_pipeline["ClockShift"], ClockShift_1=preprocessing_pipeline["ClockShift_1"], target=train_pipeline["load_power_statistics"], callbacks=[LinePlotCallback('SVR')])
def test_get_params(self, fm_mock): result = Pipeline(batch=pd.Timedelta("1h")).get_params() self.assertEqual(result, {"batch": pd.Timedelta("1h")})
def setUp(self, fm_mock) -> None: self.fm_mock = fm_mock() self.pipeline = Pipeline()
import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.preprocessing import StandardScaler # From pyWATTS the pipeline is imported from pywatts.callbacks import LinePlotCallback from pywatts.core.computation_mode import ComputationMode from pywatts.core.pipeline import Pipeline # All modules required for the pipeline are imported from pywatts.modules import CalendarExtraction, CalendarFeature, ClockShift, LinearInterpolater, SKLearnWrapper from pywatts.summaries import RMSE # The main function is where the pipeline is created and run if __name__ == "__main__": # Create a pipeline pipeline = Pipeline(path="../results") # Extract dummy calender features, using holidays from Germany # NOTE: CalendarExtraction can't return multiple features. calendar = CalendarExtraction(continent="Europe", country="Germany", features=[ CalendarFeature.month, CalendarFeature.weekday, CalendarFeature.weekend ])(x=pipeline["load_power_statistics"]) # Deal with missing values through linear interpolation imputer_power_statistics = LinearInterpolater( method="nearest", dim="time", name="imputer_power")(x=pipeline["load_power_statistics"])
# Other modules required for the pipeline are imported import pandas as pd from sklearn.preprocessing import StandardScaler from statsmodels.tsa.arima_model import ARIMA from pywatts.callbacks import CSVCallback, LinePlotCallback from pywatts.core.computation_mode import ComputationMode from pywatts.core.pipeline import Pipeline # All modules required for the pipeline are imported from pywatts.modules import CalendarExtraction, CalendarFeature, ClockShift, LinearInterpolater, RmseCalculator, \ SKLearnWrapper, SmTimeSeriesModelWrapper if __name__ == "__main__": # Create a pipeline pipeline = Pipeline(path="../results/statsmodel") # Extract dummy calender features, using holidays from Germany cal_features = CalendarExtraction(features=[CalendarFeature.hour, CalendarFeature.weekday, CalendarFeature.month], continent="Europe", country="Germany" )(x=pipeline["load_power_statistics"]) # Deal with missing values through linear interpolation imputer_power_statistics = LinearInterpolater( method="nearest", dim="time", name="imputer_power" )(x=pipeline["load_power_statistics"]) # Scale the data using a standard SKLearn scaler power_scaler = SKLearnWrapper(module=StandardScaler(), name="scaler_power") scale_power_statistics = power_scaler(x=imputer_power_statistics)
from pywatts.modules.trend_extraction import TrendExtraction from pywatts.utils._xarray_time_series_utils import numpy_to_xarray from pywatts.wrapper.function_module import FunctionModule # NOTE If you choose a horizon greater than 24 you have to shift the profile -> Else future values may be considered for calculating the profile. HORIZON = 24 def get_diff(x, profile): return numpy_to_xarray(x.values - profile.values, x, "difference") drift_occured = False if __name__ == "__main__": pipeline = Pipeline("pnn_pipeline") profile_moving = RollingMean( window_size=28, group_by=RollingGroupBy.WorkdayWeekend)(x=(pipeline["BldgX"])) difference = FunctionModule(get_diff)(x=pipeline["BldgX"], profile=profile_moving) trend = TrendExtraction(168, 5)(x=difference) calendar = CalendarExtraction( country="BadenWurttemberg", features=[ CalendarFeature.hour_sine, CalendarFeature.month_sine, CalendarFeature.day_sine, CalendarFeature.monday, CalendarFeature.tuesday, CalendarFeature.wednesday, CalendarFeature.thursday, CalendarFeature.friday, CalendarFeature.hour_cos, CalendarFeature.day_cos,