示例#1
0
def earnn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    pretrained=True,
    ignore_vars=None,
):
    data_path = get_data_path()

    if not pretrained:
        predictor = EARecurrentNetwork(
            hidden_size=128,
            data_folder=data_path,
            experiment=experiment,
            include_pred_month=include_pred_month,
            surrounding_pixels=surrounding_pixels,
            ignore_vars=ignore_vars,
        )
        predictor.train(num_epochs=50, early_stopping=5)
        predictor.evaluate(save_preds=True)
        predictor.save_model()
    else:
        predictor = load_model(data_path /
                               f"models/{experiment}/ealstm/model.pt")

    test_file = data_path / f"features/{experiment}/test/2018_3"
    assert test_file.exists()
    all_explanations_for_file(test_file, predictor, batch_size=100)
def earnn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    pretrained=True,
    ignore_vars=None,
    include_static=True,
):
    # if the working directory is alread ml_drought don't need ../data
    if Path(".").absolute().as_posix().split("/")[-1] == "ml_drought":
        data_path = Path("data")
    else:
        data_path = Path("../data")

    if not pretrained:
        predictor = EARecurrentNetwork(
            hidden_size=128,
            data_folder=data_path,
            experiment=experiment,
            include_pred_month=include_pred_month,
            surrounding_pixels=surrounding_pixels,
            ignore_vars=ignore_vars,
            include_static=include_static,
        )
        predictor.train(num_epochs=50, early_stopping=10)
        predictor.evaluate(save_preds=True)
        predictor.save_model()
    else:
        predictor = load_model(data_path /
                               f"models/{experiment}/ealstm/model.pt")
示例#3
0
def earnn(experiment='one_month_forecast',
          include_pred_month=True,
          surrounding_pixels=None,
          pretrained=True):
    # if the working directory is alread ml_drought don't need ../data
    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
        data_path = Path('data')
    else:
        data_path = Path('../data')

    if not pretrained:
        predictor = EARecurrentNetwork(hidden_size=128,
                                       data_folder=data_path,
                                       experiment=experiment,
                                       include_pred_month=include_pred_month,
                                       surrounding_pixels=surrounding_pixels)
        predictor.train(num_epochs=50, early_stopping=5)
        predictor.evaluate(save_preds=True)
        predictor.save_model()
    else:
        predictor = load_model(data_path /
                               f'models/{experiment}/ealstm/model.pt')

    test_file = data_path / f'features/{experiment}/test/2018_3'
    assert test_file.exists()
    all_shap_for_file(test_file, predictor, batch_size=100)
示例#4
0
    def test_predict(self, tmp_path, use_pred_months):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        y = x.isel(time=[-1])

        train_features = tmp_path / 'features/one_month_forecast/train/hello'
        train_features.mkdir(parents=True)

        test_features = tmp_path / 'features/one_month_forecast/test/hello'
        test_features.mkdir(parents=True)

        norm_dict = {'VHI': {'mean': 0.0, 'std': 1.0}}
        with (tmp_path / 'features/one_month_forecast/normalizing_dict.pkl'
              ).open('wb') as f:
            pickle.dump(norm_dict, f)

        x.to_netcdf(test_features / 'x.nc')
        y.to_netcdf(test_features / 'y.nc')

        x.to_netcdf(train_features / 'x.nc')
        y.to_netcdf(train_features / 'y.nc')

        # static
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        static_features = tmp_path / f'features/static'
        static_features.mkdir(parents=True)
        x_static.to_netcdf(static_features / 'data.nc')

        static_norm_dict = {'VHI': {'mean': 0.0, 'std': 1.0}}
        with (tmp_path /
              f'features/static/normalizing_dict.pkl').open('wb') as f:
            pickle.dump(static_norm_dict, f)

        dense_features = [10]
        hidden_size = 128
        rnn_dropout = 0.25

        model = EARecurrentNetwork(hidden_size=hidden_size,
                                   dense_features=dense_features,
                                   rnn_dropout=rnn_dropout,
                                   data_folder=tmp_path)
        model.train()
        test_arrays_dict, pred_dict = model.predict()

        # the foldername "hello" is the only one which should be in the dictionaries
        assert ('hello' in test_arrays_dict.keys()) and (len(test_arrays_dict)
                                                         == 1)
        assert ('hello' in pred_dict.keys()) and (len(pred_dict) == 1)

        # _make_dataset with const=True returns all ones
        assert (test_arrays_dict['hello']['y'] == 1).all()
示例#5
0
    def test_train(self, tmp_path, capsys, use_pred_months,
                   use_static_embedding):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        y = x.isel(time=[-1])

        test_features = tmp_path / "features/one_month_forecast/train/1980_1"
        test_features.mkdir(parents=True)

        norm_dict = {"VHI": {"mean": 0, "std": 1}}
        with (tmp_path / "features/one_month_forecast/normalizing_dict.pkl"
              ).open("wb") as f:
            pickle.dump(norm_dict, f)

        x.to_netcdf(test_features / "x.nc")
        y.to_netcdf(test_features / "y.nc")

        # static
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        static_features = tmp_path / f"features/static"
        static_features.mkdir(parents=True)
        x_static.to_netcdf(static_features / "data.nc")

        static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}
        with (tmp_path /
              f"features/static/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(static_norm_dict, f)

        dense_features = [10]
        hidden_size = 128
        rnn_dropout = 0.25

        model = EARecurrentNetwork(
            hidden_size=hidden_size,
            dense_features=dense_features,
            rnn_dropout=rnn_dropout,
            data_folder=tmp_path,
            static_embedding_size=use_static_embedding,
            normalize_y=True,
        )
        model.train()

        captured = capsys.readouterr()
        expected_stdout = "Epoch 1, train smooth L1: 0."
        assert expected_stdout in captured.out

        assert type(model.model) == EALSTM, f"Model attribute not an EALSTM!"
示例#6
0
def earnn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    pretrained=False,
    explain=False,
    static="features",
    ignore_vars=None,
    num_epochs=50,
    early_stopping=5,
    static_embedding_size=10,
    hidden_size=128,
    predict_delta=False,
    spatial_mask=None,
    include_latlons=False,
    normalize_y=True,
    include_prev_y=True,
    include_yearly_aggs=True,  # new
    clear_nans=True,
    weight_observations=False,
    pred_month_static=False,
):
    data_path = get_data_path()

    if not pretrained:
        predictor = EARecurrentNetwork(
            hidden_size=hidden_size,
            data_folder=data_path,
            experiment=experiment,
            include_pred_month=include_pred_month,
            surrounding_pixels=surrounding_pixels,
            static=static,
            static_embedding_size=static_embedding_size,
            ignore_vars=ignore_vars,
            predict_delta=predict_delta,
            spatial_mask=spatial_mask,
            include_latlons=include_latlons,
            normalize_y=normalize_y,
            include_prev_y=include_prev_y,
            include_yearly_aggs=include_yearly_aggs,
            clear_nans=clear_nans,
            weight_observations=weight_observations,
            pred_month_static=pred_month_static,
        )
        predictor.train(num_epochs=num_epochs, early_stopping=early_stopping)
        predictor.evaluate(save_preds=True)
        predictor.save_model()
    else:
        predictor = load_model(data_path /
                               f"models/{experiment}/ealstm/model.pt")

    if explain:
        test_file = data_path / f"features/{experiment}/test/2018_3"
        assert test_file.exists()
        all_explanations_for_file(test_file, predictor, batch_size=100)
示例#7
0
    def test_train(self, tmp_path, capsys, use_pred_months):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        y = x.isel(time=[-1])

        test_features = tmp_path / 'features/one_month_forecast/train/hello'
        test_features.mkdir(parents=True)

        norm_dict = {'VHI': {'mean': 0, 'std': 1}}
        with (tmp_path / 'features/one_month_forecast/normalizing_dict.pkl'
              ).open('wb') as f:
            pickle.dump(norm_dict, f)

        x.to_netcdf(test_features / 'x.nc')
        y.to_netcdf(test_features / 'y.nc')

        # static
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        static_features = tmp_path / f'features/static'
        static_features.mkdir(parents=True)
        x_static.to_netcdf(static_features / 'data.nc')

        static_norm_dict = {'VHI': {'mean': 0.0, 'std': 1.0}}
        with (tmp_path /
              f'features/static/normalizing_dict.pkl').open('wb') as f:
            pickle.dump(static_norm_dict, f)

        dense_features = [10]
        hidden_size = 128
        rnn_dropout = 0.25

        model = EARecurrentNetwork(hidden_size=hidden_size,
                                   dense_features=dense_features,
                                   rnn_dropout=rnn_dropout,
                                   data_folder=tmp_path)
        model.train()

        captured = capsys.readouterr()
        expected_stdout = 'Epoch 1, train smooth L1: 0.'
        assert expected_stdout in captured.out

        assert type(model.model) == EALSTM, \
            f'Model attribute not an EALSTM!'
示例#8
0
def run_models(data_path, experiment):
    # NOTE: this model is the same for all experiments
    print("Running persistence model")
    predictor = Persistence(data_path, experiment=experiment)
    predictor.evaluate(save_preds=True)

    # linear regression
    print(f"Running Linear Regression model: {experiment}")
    predictor = LinearRegression(data_path,
                                 experiment=experiment,
                                 include_pred_month=True,
                                 surrounding_pixels=1)
    predictor.train(num_epochs=10, early_stopping=3)

    # linear network
    print(f"Running Linear Neural Network model: {experiment}")
    predictor = LinearNetwork(
        data_folder=data_path,
        experiment=experiment,
        layer_sizes=[100],
        include_pred_month=True,
        surrounding_pixels=1,
    )
    predictor.train(num_epochs=10, early_stopping=3)
    predictor.evaluate(save_preds=True)
    predictor.save_model()

    # recurrent network
    print(f"Running RNN (LSTM) model: {experiment}")
    predictor = RecurrentNetwork(
        data_folder=data_path,
        hidden_size=128,
        experiment=experiment,
        include_pred_month=True,
        surrounding_pixels=1,
    )
    predictor.train(num_epochs=10, early_stopping=3)
    predictor.evaluate(save_preds=True)
    predictor.save_model()

    # EA LSTM
    print(f"Running Entity Aware LSTM model: {experiment}")
    predictor = EARecurrentNetwork(
        data_folder=data_path,
        hidden_size=128,
        experiment=experiment,
        include_pred_month=True,
        surrounding_pixels=1,
    )
    predictor.train(num_epochs=10, early_stopping=3)
    predictor.evaluate(save_preds=True)
    predictor.save_model()
示例#9
0
    def test_save(self, tmp_path, monkeypatch):

        features_per_month = 5
        dense_features = [10]
        input_dense_features = copy(dense_features)
        hidden_size = 128
        rnn_dropout = 0.25
        include_latlons = True
        include_pred_month = True
        include_yearly_aggs = True
        yearly_agg_size = 3

        def mocktrain(self):
            self.model = EALSTM(features_per_month,
                                dense_features,
                                hidden_size,
                                rnn_dropout,
                                include_latlons,
                                include_pred_month,
                                experiment='one_month_forecast',
                                yearly_agg_size=yearly_agg_size)
            self.features_per_month = features_per_month
            self.yearly_agg_size = yearly_agg_size

        monkeypatch.setattr(EARecurrentNetwork, 'train', mocktrain)

        model = EARecurrentNetwork(hidden_size=hidden_size,
                                   dense_features=dense_features,
                                   include_pred_month=include_pred_month,
                                   include_latlons=include_latlons,
                                   rnn_dropout=rnn_dropout,
                                   data_folder=tmp_path,
                                   include_yearly_aggs=include_yearly_aggs)
        model.train()
        model.save_model()

        assert (tmp_path / 'models/one_month_forecast/ealstm/model.pt').exists(), \
            f'Model not saved!'

        model_dict = torch.load(model.model_dir / 'model.pt',
                                map_location='cpu')

        for key, val in model_dict['model']['state_dict'].items():
            assert (model.model.state_dict()[key] == val).all()

        assert model_dict['model']['features_per_month'] == features_per_month
        assert model_dict['model']['yearly_agg_size'] == yearly_agg_size
        assert model_dict['hidden_size'] == hidden_size
        assert model_dict['rnn_dropout'] == rnn_dropout
        assert model_dict['dense_features'] == input_dense_features
        assert model_dict['include_pred_month'] == include_pred_month
        assert model_dict['include_latlons'] == include_latlons
        assert model_dict['include_yearly_aggs'] == include_yearly_aggs
        assert model_dict['experiment'] == 'one_month_forecast'
示例#10
0
    def test_save(self, tmp_path, monkeypatch):

        features_per_month = 5
        dense_features = [10]
        input_dense_features = copy(dense_features)
        hidden_size = 128
        rnn_dropout = 0.25
        include_latlons = True
        include_pred_month = True
        include_yearly_aggs = True
        yearly_agg_size = 3
        include_prev_y = True
        normalize_y = False

        def mocktrain(self):
            self.model = EALSTM(
                features_per_month,
                dense_features,
                hidden_size,
                rnn_dropout,
                include_latlons,
                include_pred_month,
                experiment="one_month_forecast",
                yearly_agg_size=yearly_agg_size,
                include_prev_y=include_prev_y,
            )
            self.features_per_month = features_per_month
            self.yearly_agg_size = yearly_agg_size

        monkeypatch.setattr(EARecurrentNetwork, "train", mocktrain)

        model = EARecurrentNetwork(
            hidden_size=hidden_size,
            dense_features=dense_features,
            include_pred_month=include_pred_month,
            include_latlons=include_latlons,
            rnn_dropout=rnn_dropout,
            data_folder=tmp_path,
            include_yearly_aggs=include_yearly_aggs,
            normalize_y=normalize_y,
        )
        model.train()
        model.save_model()

        assert (tmp_path / "models/one_month_forecast/ealstm/model.pt"
                ).exists(), f"Model not saved!"

        model_dict = torch.load(model.model_dir / "model.pt",
                                map_location="cpu")

        for key, val in model_dict["model"]["state_dict"].items():
            assert (model.model.state_dict()[key] == val).all()

        assert model_dict["model"]["features_per_month"] == features_per_month
        assert model_dict["model"]["yearly_agg_size"] == yearly_agg_size
        assert model_dict["hidden_size"] == hidden_size
        assert model_dict["rnn_dropout"] == rnn_dropout
        assert model_dict["dense_features"] == input_dense_features
        assert model_dict["include_pred_month"] == include_pred_month
        assert model_dict["include_latlons"] == include_latlons
        assert model_dict["include_yearly_aggs"] == include_yearly_aggs
        assert model_dict["experiment"] == "one_month_forecast"
        assert model_dict["include_prev_y"] == include_prev_y
        assert model_dict["normalize_y"] == normalize_y
示例#11
0
    def test_predict_and_explain(self, tmp_path, use_pred_months,
                                 predict_delta):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        y = x.isel(time=[-1])

        train_features = tmp_path / "features/one_month_forecast/train/1980_1"
        train_features.mkdir(parents=True)

        test_features = tmp_path / "features/one_month_forecast/test/1980_1"
        test_features.mkdir(parents=True)

        norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}
        with (tmp_path / "features/one_month_forecast/normalizing_dict.pkl"
              ).open("wb") as f:
            pickle.dump(norm_dict, f)

        x.to_netcdf(test_features / "x.nc")
        y.to_netcdf(test_features / "y.nc")

        x.to_netcdf(train_features / "x.nc")
        y.to_netcdf(train_features / "y.nc")

        # static
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        static_features = tmp_path / f"features/static"
        static_features.mkdir(parents=True)
        x_static.to_netcdf(static_features / "data.nc")

        static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}
        with (tmp_path /
              f"features/static/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(static_norm_dict, f)

        dense_features = [10]
        hidden_size = 128
        rnn_dropout = 0.25

        model = EARecurrentNetwork(
            hidden_size=hidden_size,
            dense_features=dense_features,
            rnn_dropout=rnn_dropout,
            data_folder=tmp_path,
            predict_delta=predict_delta,
            normalize_y=True,
        )
        model.train()
        test_arrays_dict, pred_dict = model.predict()

        # the foldername "1980_1" is the only one which should be in the dictionaries
        assert ("1980_1" in test_arrays_dict.keys()) and (len(test_arrays_dict)
                                                          == 1)
        assert ("1980_1" in pred_dict.keys()) and (len(pred_dict) == 1)

        if not predict_delta:
            # _make_dataset with const=True returns all ones
            assert (test_arrays_dict["1980_1"]["y"] == 1).all()
        else:
            # _make_dataset with const=True & predict_delta
            # returns a change of 0
            assert (test_arrays_dict["1980_1"]["y"] == 0).all()

        # test the Morris explanation works
        test_dl = next(
            iter(
                model.get_dataloader(mode="test",
                                     to_tensor=True,
                                     shuffle_data=False)))

        for key, val in test_dl.items():
            output_m = model.explain(val.x,
                                     save_explanations=True,
                                     method="morris")
            assert type(output_m) is TrainData
            assert (model.model_dir /
                    "analysis/morris_value_historical.npy").exists()
示例#12
0
    def test_train(
        self,
        tmp_path,
        capsys,
        use_pred_months,
        use_static_embedding,
        static,
        check_inversion,
    ):
        # make directories
        for ts in ["2001_11", "2001_12"]:
            test_features = tmp_path / f"features/one_month_forecast/train/{ts}"
            test_features.mkdir(parents=True)

        norm_dict = {"VHI": {"mean": 0, "std": 1}}
        with (tmp_path / "features/one_month_forecast/normalizing_dict.pkl").open(
            "wb"
        ) as f:
            pickle.dump(norm_dict, f)

        # save the X, y data pairs
        x, _, _ = _make_dataset(size=(5, 5), const=True)

        for ts in ["2001_11", "2001_12"]:
            if ts == "2001_12":
                y = x.sel(time="2001-12")
                x_save = x.sel(time=slice("2000-12", "2001-11"))
            else:
                y = x.sel(time="2001-11")
                x_save = x.sel(time=slice("2000-11", "2001-10"))
            x_save.to_netcdf(test_features / "x.nc")
            y.to_netcdf(test_features / "y.nc")

        # static
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        static_features = tmp_path / f"features/static"
        static_features.mkdir(parents=True)
        x_static.to_netcdf(static_features / "data.nc")

        static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}
        with (tmp_path / f"features/static/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(static_norm_dict, f)

        dense_features = [10]
        hidden_size = 128
        rnn_dropout = 0.25

        model = EARecurrentNetwork(
            hidden_size=hidden_size,
            dense_features=dense_features,
            rnn_dropout=rnn_dropout,
            data_folder=tmp_path,
            static_embedding_size=use_static_embedding,
            normalize_y=True,
            include_yearly_aggs=False,
            static=static,
        )
        model.train(check_inversion=check_inversion)

        captured = capsys.readouterr()
        expected_stdout = "Epoch 1, train smooth L1: 0."
        assert expected_stdout in captured.out

        assert type(model.model) == EALSTM, f"Model attribute not an EALSTM!"

        # ------------------
        # Check static embedding
        # -------------------
        if use_static_embedding is not None:
            all_e, (all_static_x, all_latlons, all_pred_months) = get_static_embedding(
                ealstm=model
            )
            assert (
                all_e[0].shape[0] == 25
            ), f"Expect 25 latlon values (pixels). Got: {all_e[0].shape}"
            assert (
                all_latlons[0].shape[0] == 25
            ), f"Expect 25 latlon values (pixels). Got: {all_e[0].shape}"

            # Moved the PredMonth OHE to the dynamic data
            assert all_static_x[0].shape == (
                25,
                1,  #  13,
            ), f"Expect 13 static dimensions Got: {all_static_x[0].shape}"
#     dynamic_ignore_vars=dynamic_ignore_vars,
#     logy=True,
#     test_years=test_years,
#     target_variable=target_var,
# )
# print("** Run the Engineer! **")

# MODELS
from src.models import EARecurrentNetwork

ealstm = EARecurrentNetwork(
    data_folder=data_dir,
    batch_size=1000,
    hidden_size=128,
    experiment='one_timestep_forecast',
    dynamic=True,
    seq_length=365,
    dynamic_ignore_vars=dynamic_ignore_vars,
    static_ignore_vars=static_ignore_vars,
    target_var='discharge_spec',
    test_years=np.arange(2011, 2017),
)
print("** Initialised Models! **")

# test the training functionality
ealstm.train(
    num_epochs=1,  # 100
    # early_stopping=10
)

# test the prediction functionality
# test_arrays_dict, preds_dict = ealstm.predict()
示例#14
0
# TODO: update `evaluate` / `predict` / `get_dataloader` / `train`
# TODO: do for EALSTM first and get some results asap rocky
data_dir = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data')
dynamic_ignore_vars = ['temperature', 'discharge_vol', 'discharge_spec',
                       'pet', 'humidity', 'shortwave_rad', 'longwave_rad', 'windspeed']

# import cProfile
# import profile

from src.models import EARecurrentNetwork
ealstm = EARecurrentNetwork(
    data_folder=data_dir,
    batch_size=1000,
    hidden_size=128,
    experiment='one_timestep_forecast',
    dynamic=True,
    seq_length=365,
    dynamic_ignore_vars=dynamic_ignore_vars,
    static_ignore_vars=static_ignore_vars,
    target_var='discharge_spec',
    test_years=np.arange(2011, 2017),
)
# test the training functionality
ealstm.train()

# test the prediction functionality
ealstm.predict()

#
captured = capsys.readouterr()
expected_stdout = "`include_yearly_aggs` does not yet work for dynamic dataloder. Setting to False"
assert (