Python LinearRegression.train示例，src.models.LinearRegression.train Python示例

示例#1

0

显示文件

文件： _base_models.py 项目： sumesh1/ml_drought

def regression(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    explain=False,
    static="features",
    ignore_vars=None,
    predict_delta=False,
    spatial_mask=None,
    include_latlons=False,
):
    predictor = LinearRegression(
        get_data_path(),
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        static=static,
        ignore_vars=ignore_vars,
        predict_delta=predict_delta,
        spatial_mask=spatial_mask,
        include_latlons=include_latlons,
    )
    predictor.train()
    predictor.evaluate(save_preds=True)

    # mostly to test it works
    if explain:
        predictor.explain(save_shap_values=True)

示例#2

0

显示文件

文件： models.py 项目： tommylees112/ml_stratosphere

def regression(experiment='one_month_forecast',
               include_pred_month=True,
               surrounding_pixels=1):
    # if the working directory is alread ml_drought don't need ../data
    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
        data_path = Path('data')
    else:
        data_path = Path('../data')

    predictor = LinearRegression(data_path,
                                 experiment=experiment,
                                 include_pred_month=include_pred_month,
                                 surrounding_pixels=surrounding_pixels)
    predictor.train()
    predictor.evaluate(save_preds=True)

    # mostly to test it works
    predictor.explain(save_shap_values=True)

示例#3

0

显示文件

    def test_save(self, tmp_path, monkeypatch):

        coef_array = np.array([1, 1, 1, 1, 1])
        intercept_array = np.array([2])

        def mocktrain(self):
            class MockModel:
                @property
                def coef_(self):
                    return coef_array

                @property
                def intercept_(self):
                    return intercept_array

            self.model = MockModel()

        monkeypatch.setattr(LinearRegression, "train", mocktrain)

        model = LinearRegression(
            tmp_path, experiment="one_month_forecast", normalize_y=False
        )
        model.train()
        model.save_model()

        assert (
            tmp_path / "models/one_month_forecast/linear_regression/model.pkl"
        ).exists(), f"Model not saved!"

        with (tmp_path / "models/one_month_forecast/linear_regression/model.pkl").open(
            "rb"
        ) as f:
            model_dict = pickle.load(f)
        assert np.array_equal(
            coef_array, model_dict["model"]["coef"]
        ), "Different coef array saved!"
        assert np.array_equal(
            intercept_array, model_dict["model"]["intercept"]
        ), "Different intercept array saved!"
        assert (
            model_dict["experiment"] == "one_month_forecast"
        ), "Different experiment saved!"

示例#4

0

显示文件

def regression(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    ignore_vars=None,
):
    data_path = get_data_path()
    spatial_mask = data_path / "interim/boundaries_preprocessed/kenya_asal_mask.nc"
    spatial_mask = None

    predictor = LinearRegression(
        data_path,
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        ignore_vars=ignore_vars,
        static="embeddings",
        spatial_mask=spatial_mask,
    )
    predictor.train()
    predictor.evaluate(save_preds=True)

示例#5

0

显示文件

文件： models_experiment.py 项目： tommylees112/esowc_notes

def regression(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    ignore_vars=None,
    include_static=True,
):
    # if the working directory is alread ml_drought don't need ../data
    if Path(".").absolute().as_posix().split("/")[-1] == "ml_drought":
        data_path = Path("data")
    else:
        data_path = Path("../data")

    predictor = LinearRegression(
        data_path,
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        ignore_vars=ignore_vars,
        include_static=include_static,
    )
    predictor.train(early_stopping=5)
    predictor.evaluate(save_preds=True)

示例#6

0

显示文件

文件： test_regression.py 项目： tommylees112/ml_stratosphere

    def test_save(self, tmp_path, monkeypatch):

        coef_array = np.array([1, 1, 1, 1, 1])
        intercept_array = np.array([2])

        def mocktrain(self):
            class MockModel:
                @property
                def coef_(self):
                    return coef_array

                @property
                def intercept_(self):
                    return intercept_array

            self.model = MockModel()

        monkeypatch.setattr(LinearRegression, 'train', mocktrain)

        model = LinearRegression(tmp_path, experiment='one_month_forecast')
        model.train()
        model.save_model()

        assert (tmp_path /
                'models/one_month_forecast/linear_regression/model.pkl'
                ).exists(), f'Model not saved!'

        with (tmp_path /
              'models/one_month_forecast/linear_regression/model.pkl'
              ).open('rb') as f:
            model_dict = pickle.load(f)
        assert np.array_equal(coef_array, model_dict['model']['coef']), \
            'Different coef array saved!'
        assert np.array_equal(intercept_array, model_dict['model']['intercept']), \
            'Different intercept array saved!'
        assert model_dict[
            'experiment'] == 'one_month_forecast', 'Different experiment saved!'

示例#7

0

显示文件

    def test_train(
        self, tmp_path, capsys, use_pred_months, experiment, monthly_agg, predict_delta
    ):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        y = x.isel(time=[-1])

        x_add1, _, _ = _make_dataset(size=(5, 5), const=True, variable_name="precip")
        x_add1 = x_add1 * 2
        x_add2, _, _ = _make_dataset(size=(5, 5), const=True, variable_name="temp")
        x_add2 = x_add2 * 3
        x = xr.merge([x, x_add1, x_add2])

        norm_dict = {
            "VHI": {"mean": 0, "std": 1},
            "precip": {"mean": 0, "std": 1},
            "temp": {"mean": 0, "std": 1},
        }

        static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}

        test_features = tmp_path / f"features/{experiment}/train/2001_12"
        test_features.mkdir(parents=True)
        pred_features = tmp_path / f"features/{experiment}/test/2001_12"
        pred_features.mkdir(parents=True)
        static_features = tmp_path / f"features/static"
        static_features.mkdir(parents=True)

        with (tmp_path / f"features/{experiment}/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(norm_dict, f)

        with (tmp_path / f"features/static/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(static_norm_dict, f)

        x.to_netcdf(test_features / "x.nc")
        x.to_netcdf(pred_features / "x.nc")
        y.to_netcdf(test_features / "y.nc")
        y.to_netcdf(pred_features / "y.nc")
        x_static.to_netcdf(static_features / "data.nc")

        model = LinearRegression(
            tmp_path,
            include_pred_month=use_pred_months,
            experiment=experiment,
            include_monthly_aggs=monthly_agg,
            predict_delta=predict_delta,
            normalize_y=True,
        )
        model.train()

        captured = capsys.readouterr()
        expected_stdout = "Epoch 1, train RMSE: "
        assert (
            expected_stdout in captured.out
        ), f"Expected stdout to be {expected_stdout}, got {captured.out}"

        assert (
            type(model.model) == linear_model.SGDRegressor
        ), f"Model attribute not a linear regression!"

        if experiment == "nowcast":
            coef_size = (3 * 35) + 2
        elif experiment == "one_month_forecast":
            coef_size = 3 * 36
        if monthly_agg:
            # doubled including the mean, tripled including the std
            coef_size *= 2
        if use_pred_months:
            coef_size += 12

        coef_size += 3  # for the yearly aggs
        coef_size += 1  # for the static variable
        coef_size += 1  # for the prev_y_var

        assert model.model.coef_.size == coef_size, f"Got unexpected coef size"

        test_arrays_dict, preds_dict = model.predict()
        assert (
            test_arrays_dict["2001_12"]["y"].size == preds_dict["2001_12"].shape[0]
        ), "Expected length of test arrays to be the same as the predictions"

        # test saving the model outputs
        model.evaluate(save_preds=True)

        save_path = model.data_path / "models" / experiment / "linear_regression"
        assert (save_path / "preds_2001_12.nc").exists()
        assert (save_path / "results.json").exists()

        pred_ds = xr.open_dataset(save_path / "preds_2001_12.nc")
        assert np.isin(["lat", "lon", "time"], [c for c in pred_ds.coords]).all()
        assert y.time == pred_ds.time

示例#8

0

显示文件

文件： test_shap_vals.py 项目： tommylees112/esowc_notes

# train models
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from src.models import LinearRegression, LinearNetwork, Persistence
from src.models.data import DataLoader

data_path = Path("data")
l = LinearRegression(data_path)
l.train()

ln = LinearNetwork(layer_sizes=[100], data_folder=data_path)
ln.train(num_epochs=10)

# ------------------------------------------------------------------------------
# try and explain the LinearRegression model
# ------------------------------------------------------------------------------
test_arrays_loader = DataLoader(
    data_path=data_path, batch_file_size=1, shuffle_data=False, mode="test"
)
key, val = list(next(iter(test_arrays_loader)).items())[0]
explanations = l.explain(val.x)

# plot the SHAP explanations

# 1. mean spatial and temporal response
mean_expl = explanations.mean(axis=0).mean(axis=0)
x_vars = val.x_vars

示例#9

0

显示文件

from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import pickle

from src.analysis import plot_shap_values
from src.models import Persistence, LinearRegression, LinearNetwork
from src.models.data import DataLoader

%load_ext autoreload
%autoreload 2

data_dir = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data')
predictor = LinearRegression(data_folder=data_dir, experiment='nowcast')
predictor.train()

示例#10

0

显示文件

文件： test_regression.py 项目： tommylees112/ml_stratosphere

    def test_train(self, tmp_path, capsys, use_pred_months, experiment,
                   monthly_agg):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        y = x.isel(time=[-1])

        x_add1, _, _ = _make_dataset(size=(5, 5),
                                     const=True,
                                     variable_name='precip')
        x_add2, _, _ = _make_dataset(size=(5, 5),
                                     const=True,
                                     variable_name='temp')
        x = xr.merge([x, x_add1, x_add2])

        norm_dict = {
            'VHI': {
                'mean': 0,
                'std': 1
            },
            'precip': {
                'mean': 0,
                'std': 1
            },
            'temp': {
                'mean': 0,
                'std': 1
            }
        }

        static_norm_dict = {'VHI': {'mean': 0.0, 'std': 1.0}}

        test_features = tmp_path / f'features/{experiment}/train/hello'
        test_features.mkdir(parents=True)
        pred_features = tmp_path / f'features/{experiment}/test/hello'
        pred_features.mkdir(parents=True)
        static_features = tmp_path / f'features/static'
        static_features.mkdir(parents=True)

        with (tmp_path /
              f'features/{experiment}/normalizing_dict.pkl').open('wb') as f:
            pickle.dump(norm_dict, f)

        with (tmp_path /
              f'features/static/normalizing_dict.pkl').open('wb') as f:
            pickle.dump(static_norm_dict, f)

        x.to_netcdf(test_features / 'x.nc')
        x.to_netcdf(pred_features / 'x.nc')
        y.to_netcdf(test_features / 'y.nc')
        y.to_netcdf(pred_features / 'y.nc')
        x_static.to_netcdf(static_features / 'data.nc')

        model = LinearRegression(tmp_path,
                                 include_pred_month=use_pred_months,
                                 experiment=experiment,
                                 include_monthly_aggs=monthly_agg)
        model.train()

        captured = capsys.readouterr()
        expected_stdout = 'Epoch 1, train RMSE: '
        assert expected_stdout in captured.out, \
            f'Expected stdout to be {expected_stdout}, got {captured.out}'

        assert type(model.model) == linear_model.SGDRegressor, \
            f'Model attribute not a linear regression!'

        if experiment == 'nowcast':
            coef_size = (3 * 35) + 2
        elif experiment == 'one_month_forecast':
            coef_size = (3 * 36)
        if monthly_agg:
            # doubled including the mean, tripled including the std
            coef_size *= 2
        if use_pred_months:
            coef_size += 12

        coef_size += 3  # for the yearly aggs
        coef_size += 1  # for the static variable

        assert model.model.coef_.size == coef_size, f'Got unexpected coef size'

        test_arrays_dict, preds_dict = model.predict()
        assert (
            test_arrays_dict['hello']['y'].size == preds_dict['hello'].shape[0]
        ), 'Expected length of test arrays to be the same as the predictions'

        # test saving the model outputs
        model.evaluate(save_preds=True)

        save_path = model.data_path / 'models' / experiment / 'linear_regression'
        assert (save_path / 'preds_hello.nc').exists()
        assert (save_path / 'results.json').exists()

        pred_ds = xr.open_dataset(save_path / 'preds_hello.nc')
        assert np.isin(['lat', 'lon', 'time'],
                       [c for c in pred_ds.coords]).all()
        assert y.time == pred_ds.time