def features1(): return [ ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)), ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)), ( "moving_average_3", MovingAverage(window_size=3), make_column_selector(dtype_include=np.number), ), ]
def test_multi_columns_time_shift_feature(self): shift = Shift(shift=-2) df_multi = pd.DataFrame({"x0": [0, 1, 2, 3, 4, 5], "x1": [7, 8, 9, 10, 11, 12]}) expected_df = pd.DataFrame.from_dict( { f"x0__{shift_class_name}": [2, 3, 4, 5, np.nan, np.nan], f"x1__{shift_class_name}": [9, 10, 11, 12, np.nan, np.nan], } ) testing.assert_frame_equal(shift.fit_transform(df_multi), expected_df)
def horizon_shift(time_series: pd.DataFrame, horizon: Union[int, List[int]] = 5) -> pd.DataFrame: """Perform a shift of the original ``time_series`` for each time step between 1 and ``horizon``. Parameters ---------- time_series : pd.DataFrame, shape (n_samples, n_features), required The list of ``TimeSeriesFeature`` from which to compute the feature_extraction. horizon : int, optional, default: ``5`` It represents how much into the future is necessary to predict. This corresponds to the number of shifts that are going to be performed on y. Returns ------- y : pd.DataFrame, shape (n_samples, horizon) The shifted time series. Examples -------- >>> import pandas as pd >>> from gtime.model_selection import horizon_shift >>> X = pd.DataFrame(range(0, 5), index=pd.date_range("2020-01-01", "2020-01-05")) >>> horizon_shift(X, horizon=2) y_1 y_2 2020-01-01 1.0 2.0 2020-01-02 2.0 3.0 2020-01-03 3.0 4.0 2020-01-04 4.0 NaN 2020-01-05 NaN NaN >>> horizon_shift(X, horizon=[2]) y_2 2020-01-01 2.0 2020-01-02 3.0 2020-01-03 4.0 2020-01-04 NaN 2020-01-05 NaN """ horizon = range(1, horizon + 1) if isinstance(horizon, (int, float)) else horizon y = pd.DataFrame(index=time_series.index) for k in sorted(horizon): shift_feature = Shift(-k) y[f"y_{k}"] = shift_feature.fit_transform(time_series) return y
def __init__(self, p: int, horizon: int): features = [ tuple((f"s{i}", Shift(i), make_column_selector(dtype_include=np.number))) for i in range(1, p + 1) ] model = GAR(LinearRegression()) super().__init__(features=features, horizon=horizon, model=model)
def __init__(self, horizon: int, seasonal_length: int): features = [ ("s1", Shift(0), make_column_selector()), ] super().__init__( features=features, horizon=horizon, model=SeasonalNaiveForecaster(seasonal_length), )
def __init__( self, p: int, horizon: Union[int, List[int]], explainer_type: Optional[str] = None, ): self.p = p self.explainer_type = explainer_type features = [ tuple((f"s{i}", Shift(i), make_column_selector(dtype_include=np.number))) for i in range(p) ] model = GAR(LinearRegression(), explainer_type=explainer_type) super().__init__(features=features, horizon=horizon, model=model)
def test_feature_creation_transform(): data = testing.makeTimeDataFrame(freq="s") shift = Shift(1) ma = MovingAverage(window_size=3) col_name = 'A' fc = FeatureCreation([ ('s1', shift, [col_name]), ('ma3', ma, [col_name]), ]) res = fc.fit(data).transform(data) assert_array_equal(res.columns.values, [ f's1__{col_name}__{shift.__class__.__name__}', f'ma3__{col_name}__{ma.__class__.__name__}' ])
from gtime.utils.hypothesis.feature_matrices import ( X_y_matrices, X_matrices, y_matrices, numpy_X_y_matrices, numpy_X_matrices, ) from gtime.utils.hypothesis.general_strategies import ( shape_X_y_matrices, ordered_pair, shape_matrix, ) df_transformer = FeatureCreation( [ ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)), ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)), ( "moving_average_3", MovingAverage(window_size=3), make_column_selector(dtype_include=np.number), ), ] ) class TestXyMatrices: @given(X_y_matrices(horizon=3, df_transformer=df_transformer)) def test_X_shape_correct(self, X_y: Tuple[pd.DataFrame, pd.DataFrame]): X, y = X_y assert X.shape[1] == len(df_transformer.transformers_)
def features2(): return [ ("shift_0", Shift(0), make_column_selector(dtype_include=np.number)), ("shift_1", Shift(1), make_column_selector(dtype_include=np.number)), ]
def __init__(self, horizon: int): features = [ ("s1", Shift(0), make_column_selector()), ] super().__init__(features=features, horizon=horizon, model=AverageForecaster())
def test_random_ts_and_shifts(self, df: pd.DataFrame, shift: int): shift_feature = Shift(shift=shift) df_shifted = shift_feature.fit_transform(df) correct_df_shifted = self._correct_shift(df, shift)
def test_shift_transform(self, shift, expected): shift = Shift(shift=shift) testing.assert_frame_equal(shift.fit_transform(df), expected)
from hypothesis import given, strategies as st from hypothesis.extra.pandas import column, data_frames from gtime.feature_extraction import ( Shift, MovingAverage, Exogenous, Polynomial, CustomFeature, MovingCustomFunction, ) from gtime.utils.hypothesis.time_indexes import giotto_time_series df = pd.DataFrame.from_dict({"x": [0, 1, 2, 3, 4, 5]}) shift_class_name = Shift().__class__.__name__ df_shift_1 = pd.DataFrame.from_dict({f"x__{shift_class_name}": [np.nan, 0, 1, 2, 3, 4]}) df_shift_m2 = pd.DataFrame.from_dict( {f"x__{shift_class_name}": [2, 3, 4, 5, np.nan, np.nan]} ) df_shift_0 = pd.DataFrame.from_dict({f"x__{shift_class_name}": [0, 1, 2, 3, 4, 5]}) # FIXME: shift a + shift b = shift a+b instead class TestShift: def _correct_shift(self, df: pd.DataFrame, shift: int) -> pd.DataFrame: return df.shift(shift) @pytest.mark.parametrize( ("shift", "expected"), [(1, df_shift_1), (-2, df_shift_m2), (0, df_shift_0)] )
time_series = TimeSeries() # You can plot time_series.plot() # Decomposition ## Un peu bizarre le plot_stl() et deux fois stl_decomposition time_series = time_series.stl_decomposition() time_series.plot_stl() time_series = time_series.recompose() # Choose a good name # Box-Cox time_series = time_series.box_cox(lambda_=0.3) # Feature forecasting features = [("shift", Shift(1), "time_series")] automatic_features = get_features() # Similar to fast.ai get_transforms() gar_forecaster = LinearRegression() # This object TimeSeriesForecastingModel keeps into account all the intermediate steps. # You don't need to manually deal with train/test split, etc.. forecasting_model = TimeSeriesForecastingModel( features=features, horizon=3, model=gar_forecaster ) forecasting_model = forecasting_model.fit(time_series) forecasting_model.predict() forecasting_model.cross_validate() # Is cross validation also on multiple time series? # Residuals analysis forecasting_model.residuals_.acf() # Questions
class TestCVPipeline: @given( models=models_grid(), n_splits=st.integers(min_value=2, max_value=10), blocking=st.booleans(), metrics=metrics(), ) def test_constructor(self, models, n_splits, blocking, metrics): cv_pipeline = CVPipeline(models_sets=models, n_splits=n_splits, blocking=blocking, metrics=metrics) list_len = np.sum( [np.prod([len(y) for y in x.values()]) for x in models.values()]) assert list_len == len(cv_pipeline.model_list) assert len(metrics) == len(cv_pipeline.metrics) @pytest.mark.parametrize("models", [{ Naive: { "horizon": [3] }, AR: { "horizon": [3], "p": [2, 3] } }]) @pytest.mark.parametrize("metrics", [{"RMSE": rmse, "MAE": mae}]) @pytest.mark.parametrize("n_splits", [3, 5]) @pytest.mark.parametrize("blocking", [True, False]) @pytest.mark.parametrize("seed", [5, 1000]) def test_fit_predict(self, models, n_splits, blocking, metrics, seed): cv_pipeline = CVPipeline(models_sets=models, n_splits=n_splits, blocking=blocking, metrics=metrics) np.random.seed(seed) idx = pd.period_range(start="2011-01-01", end="2012-01-01") df = pd.DataFrame(np.random.standard_normal((len(idx), 1)), index=idx, columns=["1"]) cv_pipeline.fit(df) assert cv_pipeline.cv_results_.shape == ( len(cv_pipeline.model_list) * len(metrics), 4, ) y_pred = cv_pipeline.predict() horizon = cv_pipeline.best_model_.horizon assert y_pred.shape == (horizon, horizon) @pytest.mark.parametrize( "models", [{ TimeSeriesForecastingModel: { "features": [ [("s3", Shift(1), ["1"])], [("ma10", MovingAverage(10), ["1"])], ], "horizon": [4], "model": [NaiveForecaster(), DriftForecaster()], } }], ) @pytest.mark.parametrize("metrics", [{"RMSE": rmse, "MAE": mae}]) @pytest.mark.parametrize("n_splits", [5]) def test_model_assembly(self, models, n_splits, metrics): cv_pipeline = CVPipeline(models_sets=models, n_splits=n_splits, metrics=metrics) idx = pd.period_range(start="2011-01-01", end="2012-01-01") df = pd.DataFrame(np.random.standard_normal((len(idx), 1)), index=idx, columns=["1"]) cv_pipeline.fit(df) assert cv_pipeline.cv_results_.shape == ( len(cv_pipeline.model_list) * len(metrics), 4, ) y_pred = cv_pipeline.predict() horizon = cv_pipeline.best_model_.horizon assert y_pred.shape == (horizon, horizon) @pytest.mark.parametrize("models", [{ Naive: { "horizon": [3] }, AR: { "horizon": [3], "p": [2, 3] } }]) @pytest.mark.parametrize("refit", ["all", "best", ["Naive: {'horizon': 3}"]]) def test_models_refit(self, models, refit): cv_pipeline = CVPipeline(models_sets=models) idx = pd.period_range(start="2011-01-01", end="2012-01-01") df = pd.DataFrame(np.random.standard_normal((len(idx), 1)), index=idx, columns=["1"]) cv_pipeline.fit(df, refit=refit) assert cv_pipeline.cv_results_.shape == ( len(cv_pipeline.model_list), 4, ) y_pred = cv_pipeline.predict() horizon = cv_pipeline.best_model_.horizon assert y_pred.shape == (horizon, horizon)