Exemplo n.º 1
0
    def test_predict(self, tmp_path, use_pred_months, predict_delta):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        y = x.isel(time=[-1])

        train_features = tmp_path / "features/one_month_forecast/train/1980_1"
        train_features.mkdir(parents=True)

        test_features = tmp_path / "features/one_month_forecast/test/1980_1"
        test_features.mkdir(parents=True)

        norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}
        with (tmp_path / "features/one_month_forecast/normalizing_dict.pkl"
              ).open("wb") as f:
            pickle.dump(norm_dict, f)

        x.to_netcdf(test_features / "x.nc")
        y.to_netcdf(test_features / "y.nc")

        x.to_netcdf(train_features / "x.nc")
        y.to_netcdf(train_features / "y.nc")

        # static
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        static_features = tmp_path / f"features/static"
        static_features.mkdir(parents=True)
        x_static.to_netcdf(static_features / "data.nc")

        static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}
        with (tmp_path /
              f"features/static/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(static_norm_dict, f)

        dense_features = [10]
        hidden_size = 128
        rnn_dropout = 0.25

        model = RecurrentNetwork(
            hidden_size=hidden_size,
            dense_features=dense_features,
            rnn_dropout=rnn_dropout,
            data_folder=tmp_path,
            predict_delta=predict_delta,
        )
        model.train()
        test_arrays_dict, pred_dict = model.predict()

        # the foldername "1980_1" is the only one which should be in the dictionaries
        assert ("1980_1" in test_arrays_dict.keys()) and (len(test_arrays_dict)
                                                          == 1)
        assert ("1980_1" in pred_dict.keys()) and (len(pred_dict) == 1)

        if not predict_delta:
            # _make_dataset with const=True returns all ones
            assert (test_arrays_dict["1980_1"]["y"] == 1).all()
Exemplo n.º 2
0
def rnn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    ignore_vars=None,
    include_static=True,
):
    # if the working directory is alread ml_drought don't need ../data
    if Path(".").absolute().as_posix().split("/")[-1] == "ml_drought":
        data_path = Path("data")
    else:
        data_path = Path("../data")

    predictor = RecurrentNetwork(
        hidden_size=128,
        data_folder=data_path,
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        ignore_vars=ignore_vars,
        include_static=include_static,
    )
    predictor.train(num_epochs=50, early_stopping=5)
    predictor.evaluate(save_preds=True)
    predictor.save_model()
Exemplo n.º 3
0
    def test_train(self, tmp_path, capsys, use_pred_months, predict_delta,
                   static):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        y = x.isel(time=[-1])

        test_features = tmp_path / "features/one_month_forecast/train/1980_1"
        test_features.mkdir(parents=True)

        norm_dict = {"VHI": {"mean": 0, "std": 1}}
        with (tmp_path / "features/one_month_forecast/normalizing_dict.pkl"
              ).open("wb") as f:
            pickle.dump(norm_dict, f)

        x.to_netcdf(test_features / "x.nc")
        y.to_netcdf(test_features / "y.nc")

        # static
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        static_features = tmp_path / f"features/static"
        static_features.mkdir(parents=True)
        x_static.to_netcdf(static_features / "data.nc")

        static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}
        with (tmp_path /
              f"features/static/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(static_norm_dict, f)

        dense_features = [10]
        hidden_size = 128
        rnn_dropout = 0.25

        model = RecurrentNetwork(
            hidden_size=hidden_size,
            dense_features=dense_features,
            rnn_dropout=rnn_dropout,
            data_folder=tmp_path,
            include_monthly_aggs=True,
            predict_delta=predict_delta,
            static=static,
        )
        check_inversion = False
        model.train(check_inversion=check_inversion)

        captured = capsys.readouterr()
        expected_stdout = "Epoch 1, train smooth L1:"
        assert expected_stdout in captured.out

        assert type(model.model) == RNN, f"Model attribute not an RNN!"
Exemplo n.º 4
0
def run_models(data_path, experiment):
    # NOTE: this model is the same for all experiments
    print("Running persistence model")
    predictor = Persistence(data_path, experiment=experiment)
    predictor.evaluate(save_preds=True)

    # linear regression
    print(f"Running Linear Regression model: {experiment}")
    predictor = LinearRegression(data_path,
                                 experiment=experiment,
                                 include_pred_month=True,
                                 surrounding_pixels=1)
    predictor.train(num_epochs=10, early_stopping=3)

    # linear network
    print(f"Running Linear Neural Network model: {experiment}")
    predictor = LinearNetwork(
        data_folder=data_path,
        experiment=experiment,
        layer_sizes=[100],
        include_pred_month=True,
        surrounding_pixels=1,
    )
    predictor.train(num_epochs=10, early_stopping=3)
    predictor.evaluate(save_preds=True)
    predictor.save_model()

    # recurrent network
    print(f"Running RNN (LSTM) model: {experiment}")
    predictor = RecurrentNetwork(
        data_folder=data_path,
        hidden_size=128,
        experiment=experiment,
        include_pred_month=True,
        surrounding_pixels=1,
    )
    predictor.train(num_epochs=10, early_stopping=3)
    predictor.evaluate(save_preds=True)
    predictor.save_model()

    # EA LSTM
    print(f"Running Entity Aware LSTM model: {experiment}")
    predictor = EARecurrentNetwork(
        data_folder=data_path,
        hidden_size=128,
        experiment=experiment,
        include_pred_month=True,
        surrounding_pixels=1,
    )
    predictor.train(num_epochs=10, early_stopping=3)
    predictor.evaluate(save_preds=True)
    predictor.save_model()
Exemplo n.º 5
0
def rnn(experiment='one_month_forecast',
        include_pred_month=True,
        surrounding_pixels=1):
    # if the working directory is alread ml_drought don't need ../data
    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
        data_path = Path('data')
    else:
        data_path = Path('../data')

    predictor = RecurrentNetwork(hidden_size=128,
                                 data_folder=data_path,
                                 experiment=experiment,
                                 include_pred_month=include_pred_month,
                                 surrounding_pixels=surrounding_pixels)
    predictor.train(num_epochs=50, early_stopping=5)
    predictor.evaluate(save_preds=True)
    predictor.save_model()

    _ = predictor.explain(save_shap_values=True)
Exemplo n.º 6
0
def rnn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    explain=False,
    static="features",
    ignore_vars=None,
    num_epochs=50,
    early_stopping=5,
    hidden_size=128,
    predict_delta=False,
    spatial_mask=None,
    include_latlons=False,
    normalize_y=True,
    include_prev_y=True,
    include_yearly_aggs=True,
    clear_nans=True,
    weight_observations=False,
):
    predictor = RecurrentNetwork(
        hidden_size=hidden_size,
        data_folder=get_data_path(),
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        static=static,
        ignore_vars=ignore_vars,
        predict_delta=predict_delta,
        spatial_mask=spatial_mask,
        include_latlons=include_latlons,
        normalize_y=normalize_y,
        include_prev_y=include_prev_y,
        include_yearly_aggs=include_yearly_aggs,
        clear_nans=clear_nans,
        weight_observations=weight_observations,
    )
    predictor.train(num_epochs=num_epochs, early_stopping=early_stopping)
    predictor.evaluate(save_preds=True)
    predictor.save_model()

    if explain:
        _ = predictor.explain(save_shap_values=True)
Exemplo n.º 7
0
def rnn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    ignore_vars=None,
    pretrained=True,
):
    predictor = RecurrentNetwork(
        hidden_size=128,
        data_folder=get_data_path(),
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        ignore_vars=ignore_vars,
    )
    predictor.train(num_epochs=50, early_stopping=5)
    predictor.evaluate(save_preds=True)
    predictor.save_model()

    _ = predictor.explain(save_shap_values=True)
Exemplo n.º 8
0
    def test_save(self, tmp_path, monkeypatch):

        features_per_month = 5
        dense_features = [10]
        input_dense_features = copy(dense_features)
        hidden_size = 128
        rnn_dropout = 0.25
        include_pred_month = True
        experiment = "one_month_forecast"
        ignore_vars = ["precip"]
        include_latlons = True
        include_prev_y = True
        normalize_y = False

        def mocktrain(self):
            self.model = RNN(
                features_per_month,
                dense_features,
                hidden_size,
                rnn_dropout,
                include_pred_month,
                include_latlons,
                experiment="one_month_forecast",
                include_prev_y=include_prev_y,
            )
            self.features_per_month = features_per_month

        monkeypatch.setattr(RecurrentNetwork, "train", mocktrain)

        model = RecurrentNetwork(
            hidden_size=hidden_size,
            dense_features=dense_features,
            rnn_dropout=rnn_dropout,
            data_folder=tmp_path,
            ignore_vars=ignore_vars,
            experiment=experiment,
            include_pred_month=include_pred_month,
            include_latlons=include_latlons,
            include_prev_y=include_prev_y,
            normalize_y=normalize_y,
        )

        model.train()
        model.save_model()

        assert (tmp_path / "models/one_month_forecast/rnn/model.pt"
                ).exists(), f"Model not saved!"

        model_dict = torch.load(model.model_dir / "model.pt",
                                map_location="cpu")

        for key, val in model_dict["model"]["state_dict"].items():
            assert (model.model.state_dict()[key] == val).all()

        assert model_dict["model"]["features_per_month"] == features_per_month
        assert model_dict["hidden_size"] == hidden_size
        assert model_dict["rnn_dropout"] == rnn_dropout
        assert model_dict["dense_features"] == input_dense_features
        assert model_dict["include_pred_month"] == include_pred_month
        assert model_dict["experiment"] == experiment
        assert model_dict["ignore_vars"] == ignore_vars
        assert model_dict["include_latlons"] == include_latlons
        assert model_dict["include_prev_y"] == include_prev_y
        assert model_dict["normalize_y"] == normalize_y