Пример #1
0
def regression(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    explain=False,
    static="features",
    ignore_vars=None,
    predict_delta=False,
    spatial_mask=None,
    include_latlons=False,
):
    predictor = LinearRegression(
        get_data_path(),
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        static=static,
        ignore_vars=ignore_vars,
        predict_delta=predict_delta,
        spatial_mask=spatial_mask,
        include_latlons=include_latlons,
    )
    predictor.train()
    predictor.evaluate(save_preds=True)

    # mostly to test it works
    if explain:
        predictor.explain(save_shap_values=True)
Пример #2
0
def regression(experiment='one_month_forecast',
               include_pred_month=True,
               surrounding_pixels=1):
    # if the working directory is alread ml_drought don't need ../data
    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
        data_path = Path('data')
    else:
        data_path = Path('../data')

    predictor = LinearRegression(data_path,
                                 experiment=experiment,
                                 include_pred_month=include_pred_month,
                                 surrounding_pixels=surrounding_pixels)
    predictor.train()
    predictor.evaluate(save_preds=True)

    # mostly to test it works
    predictor.explain(save_shap_values=True)
Пример #3
0
    def test_save(self, tmp_path, monkeypatch):

        coef_array = np.array([1, 1, 1, 1, 1])
        intercept_array = np.array([2])

        def mocktrain(self):
            class MockModel:
                @property
                def coef_(self):
                    return coef_array

                @property
                def intercept_(self):
                    return intercept_array

            self.model = MockModel()

        monkeypatch.setattr(LinearRegression, "train", mocktrain)

        model = LinearRegression(
            tmp_path, experiment="one_month_forecast", normalize_y=False
        )
        model.train()
        model.save_model()

        assert (
            tmp_path / "models/one_month_forecast/linear_regression/model.pkl"
        ).exists(), f"Model not saved!"

        with (tmp_path / "models/one_month_forecast/linear_regression/model.pkl").open(
            "rb"
        ) as f:
            model_dict = pickle.load(f)
        assert np.array_equal(
            coef_array, model_dict["model"]["coef"]
        ), "Different coef array saved!"
        assert np.array_equal(
            intercept_array, model_dict["model"]["intercept"]
        ), "Different intercept array saved!"
        assert (
            model_dict["experiment"] == "one_month_forecast"
        ), "Different experiment saved!"
Пример #4
0
def regression(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    ignore_vars=None,
):
    data_path = get_data_path()
    spatial_mask = data_path / "interim/boundaries_preprocessed/kenya_asal_mask.nc"
    spatial_mask = None

    predictor = LinearRegression(
        data_path,
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        ignore_vars=ignore_vars,
        static="embeddings",
        spatial_mask=spatial_mask,
    )
    predictor.train()
    predictor.evaluate(save_preds=True)
Пример #5
0
def regression(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    ignore_vars=None,
    include_static=True,
):
    # if the working directory is alread ml_drought don't need ../data
    if Path(".").absolute().as_posix().split("/")[-1] == "ml_drought":
        data_path = Path("data")
    else:
        data_path = Path("../data")

    predictor = LinearRegression(
        data_path,
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        ignore_vars=ignore_vars,
        include_static=include_static,
    )
    predictor.train(early_stopping=5)
    predictor.evaluate(save_preds=True)
    def test_save(self, tmp_path, monkeypatch):

        coef_array = np.array([1, 1, 1, 1, 1])
        intercept_array = np.array([2])

        def mocktrain(self):
            class MockModel:
                @property
                def coef_(self):
                    return coef_array

                @property
                def intercept_(self):
                    return intercept_array

            self.model = MockModel()

        monkeypatch.setattr(LinearRegression, 'train', mocktrain)

        model = LinearRegression(tmp_path, experiment='one_month_forecast')
        model.train()
        model.save_model()

        assert (tmp_path /
                'models/one_month_forecast/linear_regression/model.pkl'
                ).exists(), f'Model not saved!'

        with (tmp_path /
              'models/one_month_forecast/linear_regression/model.pkl'
              ).open('rb') as f:
            model_dict = pickle.load(f)
        assert np.array_equal(coef_array, model_dict['model']['coef']), \
            'Different coef array saved!'
        assert np.array_equal(intercept_array, model_dict['model']['intercept']), \
            'Different intercept array saved!'
        assert model_dict[
            'experiment'] == 'one_month_forecast', 'Different experiment saved!'
Пример #7
0
    def test_train(
        self, tmp_path, capsys, use_pred_months, experiment, monthly_agg, predict_delta
    ):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        y = x.isel(time=[-1])

        x_add1, _, _ = _make_dataset(size=(5, 5), const=True, variable_name="precip")
        x_add1 = x_add1 * 2
        x_add2, _, _ = _make_dataset(size=(5, 5), const=True, variable_name="temp")
        x_add2 = x_add2 * 3
        x = xr.merge([x, x_add1, x_add2])

        norm_dict = {
            "VHI": {"mean": 0, "std": 1},
            "precip": {"mean": 0, "std": 1},
            "temp": {"mean": 0, "std": 1},
        }

        static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}

        test_features = tmp_path / f"features/{experiment}/train/2001_12"
        test_features.mkdir(parents=True)
        pred_features = tmp_path / f"features/{experiment}/test/2001_12"
        pred_features.mkdir(parents=True)
        static_features = tmp_path / f"features/static"
        static_features.mkdir(parents=True)

        with (tmp_path / f"features/{experiment}/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(norm_dict, f)

        with (tmp_path / f"features/static/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(static_norm_dict, f)

        x.to_netcdf(test_features / "x.nc")
        x.to_netcdf(pred_features / "x.nc")
        y.to_netcdf(test_features / "y.nc")
        y.to_netcdf(pred_features / "y.nc")
        x_static.to_netcdf(static_features / "data.nc")

        model = LinearRegression(
            tmp_path,
            include_pred_month=use_pred_months,
            experiment=experiment,
            include_monthly_aggs=monthly_agg,
            predict_delta=predict_delta,
            normalize_y=True,
        )
        model.train()

        captured = capsys.readouterr()
        expected_stdout = "Epoch 1, train RMSE: "
        assert (
            expected_stdout in captured.out
        ), f"Expected stdout to be {expected_stdout}, got {captured.out}"

        assert (
            type(model.model) == linear_model.SGDRegressor
        ), f"Model attribute not a linear regression!"

        if experiment == "nowcast":
            coef_size = (3 * 35) + 2
        elif experiment == "one_month_forecast":
            coef_size = 3 * 36
        if monthly_agg:
            # doubled including the mean, tripled including the std
            coef_size *= 2
        if use_pred_months:
            coef_size += 12

        coef_size += 3  # for the yearly aggs
        coef_size += 1  # for the static variable
        coef_size += 1  # for the prev_y_var

        assert model.model.coef_.size == coef_size, f"Got unexpected coef size"

        test_arrays_dict, preds_dict = model.predict()
        assert (
            test_arrays_dict["2001_12"]["y"].size == preds_dict["2001_12"].shape[0]
        ), "Expected length of test arrays to be the same as the predictions"

        # test saving the model outputs
        model.evaluate(save_preds=True)

        save_path = model.data_path / "models" / experiment / "linear_regression"
        assert (save_path / "preds_2001_12.nc").exists()
        assert (save_path / "results.json").exists()

        pred_ds = xr.open_dataset(save_path / "preds_2001_12.nc")
        assert np.isin(["lat", "lon", "time"], [c for c in pred_ds.coords]).all()
        assert y.time == pred_ds.time
Пример #8
0
# train models
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from src.models import LinearRegression, LinearNetwork, Persistence
from src.models.data import DataLoader

data_path = Path("data")
l = LinearRegression(data_path)
l.train()

ln = LinearNetwork(layer_sizes=[100], data_folder=data_path)
ln.train(num_epochs=10)

# ------------------------------------------------------------------------------
# try and explain the LinearRegression model
# ------------------------------------------------------------------------------
test_arrays_loader = DataLoader(
    data_path=data_path, batch_file_size=1, shuffle_data=False, mode="test"
)
key, val = list(next(iter(test_arrays_loader)).items())[0]
explanations = l.explain(val.x)

# plot the SHAP explanations

# 1. mean spatial and temporal response
mean_expl = explanations.mean(axis=0).mean(axis=0)
x_vars = val.x_vars
Пример #9
0
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import pickle

from src.analysis import plot_shap_values
from src.models import Persistence, LinearRegression, LinearNetwork
from src.models.data import DataLoader

%load_ext autoreload
%autoreload 2

data_dir = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data')
predictor = LinearRegression(data_folder=data_dir, experiment='nowcast')
predictor.train()
    def test_train(self, tmp_path, capsys, use_pred_months, experiment,
                   monthly_agg):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        y = x.isel(time=[-1])

        x_add1, _, _ = _make_dataset(size=(5, 5),
                                     const=True,
                                     variable_name='precip')
        x_add2, _, _ = _make_dataset(size=(5, 5),
                                     const=True,
                                     variable_name='temp')
        x = xr.merge([x, x_add1, x_add2])

        norm_dict = {
            'VHI': {
                'mean': 0,
                'std': 1
            },
            'precip': {
                'mean': 0,
                'std': 1
            },
            'temp': {
                'mean': 0,
                'std': 1
            }
        }

        static_norm_dict = {'VHI': {'mean': 0.0, 'std': 1.0}}

        test_features = tmp_path / f'features/{experiment}/train/hello'
        test_features.mkdir(parents=True)
        pred_features = tmp_path / f'features/{experiment}/test/hello'
        pred_features.mkdir(parents=True)
        static_features = tmp_path / f'features/static'
        static_features.mkdir(parents=True)

        with (tmp_path /
              f'features/{experiment}/normalizing_dict.pkl').open('wb') as f:
            pickle.dump(norm_dict, f)

        with (tmp_path /
              f'features/static/normalizing_dict.pkl').open('wb') as f:
            pickle.dump(static_norm_dict, f)

        x.to_netcdf(test_features / 'x.nc')
        x.to_netcdf(pred_features / 'x.nc')
        y.to_netcdf(test_features / 'y.nc')
        y.to_netcdf(pred_features / 'y.nc')
        x_static.to_netcdf(static_features / 'data.nc')

        model = LinearRegression(tmp_path,
                                 include_pred_month=use_pred_months,
                                 experiment=experiment,
                                 include_monthly_aggs=monthly_agg)
        model.train()

        captured = capsys.readouterr()
        expected_stdout = 'Epoch 1, train RMSE: '
        assert expected_stdout in captured.out, \
            f'Expected stdout to be {expected_stdout}, got {captured.out}'

        assert type(model.model) == linear_model.SGDRegressor, \
            f'Model attribute not a linear regression!'

        if experiment == 'nowcast':
            coef_size = (3 * 35) + 2
        elif experiment == 'one_month_forecast':
            coef_size = (3 * 36)
        if monthly_agg:
            # doubled including the mean, tripled including the std
            coef_size *= 2
        if use_pred_months:
            coef_size += 12

        coef_size += 3  # for the yearly aggs
        coef_size += 1  # for the static variable

        assert model.model.coef_.size == coef_size, f'Got unexpected coef size'

        test_arrays_dict, preds_dict = model.predict()
        assert (
            test_arrays_dict['hello']['y'].size == preds_dict['hello'].shape[0]
        ), 'Expected length of test arrays to be the same as the predictions'

        # test saving the model outputs
        model.evaluate(save_preds=True)

        save_path = model.data_path / 'models' / experiment / 'linear_regression'
        assert (save_path / 'preds_hello.nc').exists()
        assert (save_path / 'results.json').exists()

        pred_ds = xr.open_dataset(save_path / 'preds_hello.nc')
        assert np.isin(['lat', 'lon', 'time'],
                       [c for c in pred_ds.coords]).all()
        assert y.time == pred_ds.time