Пример #1
0
def test_from_data_frame_time_series_data_set_multi_call(
        patch_time_series_data_set):
    """Tests that ``TabularForecastingData.from_data_frame`` calls ``TimeSeriesDataSet`` with the expected
    parameters when called separately for each stage."""
    patch_time_series_data_set.return_value.get_parameters.return_value = {
        "test": None
    }

    train_data = MagicMock()
    val_data = MagicMock()

    train_datamodule = TabularForecastingData.from_data_frame(
        "time_idx",
        "target",
        ["series"],
        train_data_frame=train_data,
        additional_kwarg="test",
        batch_size=4,
    )

    TabularForecastingData.from_data_frame(
        val_data_frame=val_data,
        parameters=train_datamodule.parameters,
        batch_size=4,
    )

    patch_time_series_data_set.assert_called_once_with(train_data,
                                                       time_idx="time_idx",
                                                       group_ids=["series"],
                                                       target="target",
                                                       additional_kwarg="test")

    patch_time_series_data_set.from_parameters.assert_called_once_with(
        {"test": None}, val_data, stop_randomization=True)
Пример #2
0
def test_from_data_frame_misconfiguration():
    """Tests that a ``MisconfigurationException`` is raised when ``TabularForecastingData`` is constructed without
    parameters."""
    with pytest.raises(MisconfigurationException,
                       match="evaluation or inference requires parameters"):
        TabularForecastingData.from_data_frame(
            "time_idx",
            "target",
            ["series"],
            val_data_frame=MagicMock(),
            additional_kwarg="test",
            batch_size=4,
        )
Пример #3
0
def test_testing_raises(sample_data):
    """Tests that ``NotImplementedError`` is raised when attempting to perform a test pass."""
    data, training_cutoff, max_prediction_length = sample_data
    datamodule = TabularForecastingData.from_data_frame(
        time_idx="time_idx",
        target="value",
        categorical_encoders={"series": NaNLabelEncoder().fit(data.series)},
        group_ids=["series"],
        time_varying_unknown_reals=["value"],
        max_encoder_length=60,
        max_prediction_length=max_prediction_length,
        train_data_frame=data[lambda x: x.time_idx <= training_cutoff],
        test_data_frame=data,
        batch_size=4,
    )

    model = TabularForecaster(
        datamodule.parameters,
        backbone="n_beats",
        backbone_kwargs={
            "widths": [32, 512],
            "backcast_loss_ratio": 0.1
        },
    )
    trainer = flash.Trainer(max_epochs=1,
                            fast_dev_run=True,
                            gradient_clip_val=0.01)

    with pytest.raises(
            NotImplementedError,
            match=
            "Backbones provided by PyTorch Forecasting don't support testing."
    ):
        trainer.test(model, datamodule=datamodule)
Пример #4
0
def test_fast_dev_run_smoke(sample_data):
    """Test that fast dev run works with the NBeats example data."""
    data, training_cutoff, max_prediction_length = sample_data
    datamodule = TabularForecastingData.from_data_frame(
        time_idx="time_idx",
        target="value",
        categorical_encoders={"series": NaNLabelEncoder().fit(data.series)},
        group_ids=["series"],
        time_varying_unknown_reals=["value"],
        max_encoder_length=60,
        max_prediction_length=max_prediction_length,
        train_data_frame=data[lambda x: x.time_idx <= training_cutoff],
        val_data_frame=data,
        batch_size=4,
    )

    model = TabularForecaster(
        datamodule.parameters,
        backbone="n_beats",
        backbone_kwargs={
            "widths": [32, 512],
            "backcast_loss_ratio": 0.1
        },
    )

    trainer = flash.Trainer(max_epochs=1,
                            fast_dev_run=True,
                            gradient_clip_val=0.01)
    trainer.fit(model, datamodule=datamodule)
# Example based on this tutorial: https://pytorch-forecasting.readthedocs.io/en/latest/tutorials/ar.html
# 1. Create the DataModule
data = generate_ar_data(seasonality=10.0, timesteps=400, n_series=100, seed=42)
data["date"] = pd.Timestamp("2020-01-01") + pd.to_timedelta(data.time_idx, "D")

max_prediction_length = 20

training_cutoff = data["time_idx"].max() - max_prediction_length

datamodule = TabularForecastingData.from_data_frame(
    time_idx="time_idx",
    target="value",
    categorical_encoders={"series": NaNLabelEncoder().fit(data.series)},
    group_ids=["series"],
    # only unknown variable is "value" - and N-Beats can also not take any additional variables
    time_varying_unknown_reals=["value"],
    max_encoder_length=60,
    max_prediction_length=max_prediction_length,
    train_data_frame=data[lambda x: x.time_idx <= training_cutoff],
    val_data_frame=data,
    batch_size=32,
)

# 2. Build the task
model = TabularForecaster(
    datamodule.parameters,
    backbone="n_beats",
    backbone_kwargs={
        "widths": [32, 512],
        "backcast_loss_ratio": 0.1
    },