def plot_ba(experiment, **kwargs):
    # Operate on cached data only.
    get_experiment_split_data.check_in_store(experiment)
    X_train, X_test, y_train, y_test = get_experiment_split_data(experiment)

    # Operate on cached data only.
    get_endog_exog_mask.check_in_store(experiment)
    master_mask = get_endog_exog_mask(experiment)[2]

    check_master_masks(master_mask)

    # Operate on cached fitted models only.
    get_model(X_train, y_train, cache_check=True)

    predicted_test = threading_get_model_predict(
        X_train=X_train,
        y_train=y_train,
        predict_X=X_test,
    )

    ba_plotting(
        *get_ba_plotting_data(predicted_test, y_test, master_mask),
        figure_saver=map_figure_saver(sub_directory=experiment.name),
        **get_aux0_aux1_kwargs(y_test, master_mask),
        filename=f"{experiment.name}_ba_prediction",
    )
def plot_obs_pred_comp(experiment, **kwargs):
    # Operate on cached data/models only.
    get_experiment_split_data.check_in_store(experiment)
    X_train, X_test, y_train, y_val = get_experiment_split_data(experiment)
    get_model(X_train, y_train, cache_check=True)

    get_endog_exog_mask.check_in_store(experiment)
    master_mask = get_endog_exog_mask(experiment)[2]

    check_master_masks(master_mask)

    u_pre = threading_get_model_predict(
        X_train=X_train,
        y_train=y_train,
        predict_X=X_test,
    )

    obs_pred_diff_cube = get_obs_pred_diff_cube(y_val, u_pre, master_mask)

    with map_figure_saver(sub_directory=experiment.name)(
            f"{experiment.name}_obs_pred_comp", sub_directory="predictions"):
        disc_cube_plot(
            obs_pred_diff_cube,
            fig=plt.figure(figsize=(5.1, 2.3)),
            cmap="BrBG",
            cmap_midpoint=0,
            cmap_symmetric=False,
            bin_edges=[-0.01, -0.001, -1e-4, 0, 0.001, 0.01, 0.02],
            extend="both",
            cbar_format=get_sci_format(ndigits=0),
            cbar_pad=0.025,
            cbar_label="Ob. - Pr.",
            **get_aux0_aux1_kwargs(y_val, master_mask),
            loc=(0.83, 0.14),
            height=0.055,
            aspect=1,
            spacing=0.06 * 0.2,
        )
示例#3
0
warnings.filterwarnings("ignore", ".*Collapsing a non-contiguous coordinate.*")
warnings.filterwarnings("ignore", ".*DEFAULT_SPHERICAL_EARTH_RADIUS.*")
warnings.filterwarnings("ignore", ".*guessing contiguous bounds.*")

warnings.filterwarnings(
    "ignore", 'Setting feature_perturbation = "tree_path_dependent".*')

if __name__ == "__main__":
    experiment = Experiment["15VEG_FAPAR"]

    # Operate on cached model / data only.
    get_endog_exog_mask.check_in_store(experiment)
    endog_data, _, master_mask = get_endog_exog_mask(experiment)

    check_master_masks(master_mask)

    get_experiment_split_data.check_in_store(experiment)
    X_train, X_test, y_train, y_test = get_experiment_split_data(experiment)

    get_model(X_train, y_train, cache_check=True)
    rf = get_model(X_train, y_train)

    get_shap_values.check_in_store(rf=rf, X=X_test)
    shap_values = get_shap_values(rf=rf, X=X_test)

    # Analysis / plotting parameters.
    diff_threshold = 0.5
    ptp_threshold_factor = 0.12  # relative to the mean

    chosen_lags = tuple(lag for lag in variable.lags if lag <= 9)
示例#4
0
def multi_model_ale_plot(*args, verbose=False, **kwargs):
    # Experiments for which data will be plotted.
    experiments = [
        Experiment["ALL"],
        Experiment["TOP15"],
        Experiment["CURR"],
        Experiment["BEST15"],
        Experiment["15VEG_FAPAR"],
        Experiment["15VEG_LAI"],
        Experiment["15VEG_VOD"],
        Experiment["15VEG_SIF"],
        Experiment["CURRDD_FAPAR"],
        Experiment["CURRDD_LAI"],
        Experiment["CURRDD_VOD"],
        Experiment["CURRDD_SIF"],
    ]

    # Operate on cached data/models only.
    experiment_masks = []
    plotting_experiment_data = {}

    for experiment in tqdm(experiments, desc="Loading data"):
        get_data(experiment, cache_check=True)
        get_experiment_split_data.check_in_store(experiment)
        X_train, X_test, y_train, y_test = get_experiment_split_data(
            experiment)
        get_model(X_train, y_train, cache_check=True)

        experiment_masks.append(get_endog_exog_mask(experiment)[2])
        plotting_experiment_data[experiment] = dict(
            model=get_model(X_train, y_train),
            X_train=X_train,
        )

    # Ensure masks are aligned.
    check_master_masks(*experiment_masks)

    lags = (0, 1, 3, 6, 9)

    for comp_vars in [[variable.FAPAR, variable.LAI],
                      [variable.SIF, variable.VOD]]:
        fig, axes = plt.subplots(5, 2, sharex="col", figsize=(7.0, 5.8))

        # Create general legend labels (with 'X' instead of FAPAR, or LAI, etc...).
        mod_exp_plot_kwargs = deepcopy(experiment_plot_kwargs)
        for plot_kwargs in mod_exp_plot_kwargs.values():
            if plot_kwargs["label"].startswith("15VEG_"):
                plot_kwargs["label"] = "15VEG_X"
            elif plot_kwargs["label"].startswith("CURRDD_"):
                plot_kwargs["label"] = "CURRDD_X"

        x_factor_exp = 0
        x_factor = 10**x_factor_exp
        # x_factor_str = rf"$10^{{{x_factor_exp}}}$"

        y_factor_exp = -4
        y_factor = 10**y_factor_exp
        y_factor_str = rf"$10^{{{y_factor_exp}}}$"

        multi_model_ale_1d(
            comp_vars[0],
            plotting_experiment_data,
            mod_exp_plot_kwargs,
            verbose=verbose,
            legend_bbox=(0.5, 1.01),
            fig=fig,
            axes=axes[:, 0:1],
            lags=lags,
            x_ndigits=2,
            x_factor=x_factor,
            x_rotation=0,
            y_ndigits=0,
            y_factor=y_factor,
        )
        multi_model_ale_1d(
            comp_vars[1],
            plotting_experiment_data,
            experiment_plot_kwargs,
            verbose=verbose,
            legend=False,
            fig=fig,
            axes=axes[:, 1:2],
            lags=lags,
            x_ndigits=2,
            x_factor=x_factor,
            x_rotation=0,
            y_ndigits=0,
            y_factor=y_factor,
        )

        for ax in axes[:, 1]:
            ax.set_ylabel("")
        for ax in axes[:, 0]:
            lag_match = re.search("(\dM)", ax.get_xlabel())
            if lag_match:
                lag_m = f" {lag_match.group(1)}"
            else:
                lag_m = ""
            ax.set_ylabel(f"ALE{lag_m} ({y_factor_str} BA)")
        for ax in axes.flatten():
            ax.set_xlabel("")

        for ax, var in zip(axes[-1], comp_vars):
            assert x_factor_exp == 0
            ax.set_xlabel(
                f"{shorten_features(str(var))} ({variable.units[var]})")

        for ax, title in zip(axes.flatten(), ascii_lowercase):
            ax.text(0.5, 1.05, f"({title})", transform=ax.transAxes)

        margin = 0.4

        for ax in axes.ravel():
            ax.set_xlim(-margin, 20 + margin)

        fig.tight_layout(h_pad=0.4)
        fig.align_labels()

        figure_saver.save_figure(
            fig,
            f"{'__'.join(map(shorten_features, map(str, comp_vars)))}_ale_comp",
            sub_directory="ale_comp",
        )
def prediction_comparisons():
    """Compare ALL and CURR predictions."""
    experiments = [Experiment.ALL, Experiment.CURR]
    # Operate on cached data/models only.

    experiment_data = {}
    experiment_models = {}

    for experiment in experiments:
        get_data(experiment, cache_check=True)
        get_experiment_split_data.check_in_store(experiment)
        X_train, X_test, y_train, y_test = get_experiment_split_data(experiment)
        get_model(X_train, y_train, cache_check=True)

        experiment_data[experiment] = get_endog_exog_mask(experiment)
        experiment_models[experiment] = get_model(X_train, y_train)

    # Ensure masks are aligned.
    check_master_masks(*(data[2] for data in experiment_data.values()))

    master_mask = next(iter(experiment_data.values()))[2]

    # Record predictions and errors.
    experiment_predictions = {}
    experiment_errors = {}
    map_experiment_predictions = {}
    map_experiment_errors = {}

    for experiment in experiments:
        X_train, X_test, y_train, y_test = get_experiment_split_data(experiment)
        predicted_test = threading_get_model_predict(
            X_train=X_train,
            y_train=y_train,
            predict_X=X_test,
        )

        print("Experiment:", experiment.name)
        print("mean observed test:", np.mean(y_test.values))
        print("mean predicted test:", np.mean(predicted_test))
        print("lowest observed test:", np.min(y_test.values))
        print(
            "fraction of times this occurs:",
            np.sum(y_test.values == np.min(y_test.values)) / y_test.values.size,
        )
        print("lowest test prediction:", np.min(predicted_test))

        experiment_predictions[experiment] = predicted_test
        experiment_errors[experiment] = y_test.values - predicted_test

        map_experiment_predictions[experiment] = get_mm_data(
            experiment_predictions[experiment], master_mask, kind="val"
        )
        map_experiment_errors[experiment] = get_mm_data(
            experiment_errors[experiment], master_mask, kind="val"
        )

    error_mag_diff = np.abs(map_experiment_errors[experiments[1]]) - np.abs(
        map_experiment_errors[experiments[0]]
    )

    y_test = get_experiment_split_data(experiment)[3]

    rel_error_mag_diff = np.mean(error_mag_diff, axis=0) / np.mean(
        get_mm_data(y_test.values, master_mask, kind="val"), axis=0
    )
    all_rel = get_unmasked(rel_error_mag_diff)

    print(f"% >0: {100 * np.sum(all_rel > 0) / all_rel.size:0.1f}")
    print(f"% <0: {100 * np.sum(all_rel < 0) / all_rel.size:0.1f}")

    fig, ax, cbar = disc_cube_plot(
        dummy_lat_lon_cube(rel_error_mag_diff),
        bin_edges=(-0.5, 0, 0.5),
        extend="both",
        cmap="PiYG",
        cmap_midpoint=0,
        cmap_symmetric=False,
        cbar_label=f"<|Err({experiments[1].name})| - |Err({experiments[0].name})|> / <Ob.>",
        cbar_shrink=0.3,
        cbar_aspect=15,
        cbar_extendfrac=0.1,
        cbar_pad=0.02,
        cbar_format=None,
        **get_aux0_aux1_kwargs(y_test, master_mask),
        loc=(0.79, 0.14),
        height=0.05,
        aspect=1.25,
        spacing=0.06 * 0.2,
    )
    cbar.ax.yaxis.label.set_size(7)

    map_figure_saver.save_figure(
        fig, f"rel_error_mag_diff_{'_'.join(map(attrgetter('name'), experiments))}"
    )