def param_iter():
     for variable_factory in tqdm(
         [variable.FAPAR, variable.DRY_DAY_PERIOD],
             desc="Feature",
             disable=verbose < 1,
     ):
         for (exc_name, exclude_inst) in tqdm(
             [("with_inst", False), ("no_inst", True)],
                 desc="Exclude inst.",
                 disable=verbose < 2,
         ):
             yield (exc_name, exclude_inst), variable_factory
def plot_clim_mon_ale_comp(*args, verbose=False, **kwargs):
    fig, axes = plt.subplots(2, 2, figsize=(5.8, 4.1))
    plot_spec = {
        axes[0, 0]:
        (Experiment["15VEG_FAPAR"], variable.FAPAR[0], "(a) 15VEG_FAPAR"),
        axes[0, 1]: (
            Experiment["15VEG_FAPAR_MON"],
            variable.FAPAR[0],
            "(b) 15VEG_FAPAR_MON",
        ),
        axes[1, 0]: (
            Experiment["15VEG_FAPAR"],
            variable.DRY_DAY_PERIOD[3],
            "(c) 15VEG_FAPAR",
        ),
        axes[1, 1]: (
            Experiment["15VEG_FAPAR_MON"],
            variable.DRY_DAY_PERIOD[3],
            "(d) 15VEG_FAPAR_MON",
        ),
    }
    for (ax, (experiment, column, title)) in tqdm(plot_spec.items(),
                                                  desc="ALE plots",
                                                  disable=not verbose):
        plot_single_1d_ale(experiment, column, ax=ax, verbose=verbose)
        ax.set_title(title)
        gc.collect()

    for ax in axes[:, 1]:
        ax.set_ylabel("")

    fig.tight_layout()
    fig.align_labels()

    figure_saver.save_figure(fig, "15VEG_FAPAR_15VEG_FAPAR_MON_ALE_comp")
Пример #3
0
def multi_model_ale_plot(*args, verbose=False, **kwargs):
    # Experiments for which data will be plotted.
    experiments = [
        Experiment["ALL"],
        Experiment["TOP15"],
        Experiment["CURR"],
        Experiment["BEST15"],
        Experiment["15VEG_FAPAR"],
        Experiment["15VEG_LAI"],
        Experiment["15VEG_VOD"],
        Experiment["15VEG_SIF"],
        Experiment["CURRDD_FAPAR"],
        Experiment["CURRDD_LAI"],
        Experiment["CURRDD_VOD"],
        Experiment["CURRDD_SIF"],
    ]

    # Operate on cached data/models only.
    experiment_masks = []
    plotting_experiment_data = {}

    for experiment in tqdm(experiments, desc="Loading data"):
        get_data(experiment, cache_check=True)
        get_experiment_split_data.check_in_store(experiment)
        X_train, X_test, y_train, y_test = get_experiment_split_data(
            experiment)
        get_model(X_train, y_train, cache_check=True)

        experiment_masks.append(get_endog_exog_mask(experiment)[2])
        plotting_experiment_data[experiment] = dict(
            model=get_model(X_train, y_train),
            X_train=X_train,
        )

    # Ensure masks are aligned.
    check_master_masks(*experiment_masks)

    lags = (0, 1, 3, 6, 9)

    for comp_vars in [[variable.FAPAR, variable.LAI],
                      [variable.SIF, variable.VOD]]:
        fig, axes = plt.subplots(5, 2, sharex="col", figsize=(7.0, 5.8))

        # Create general legend labels (with 'X' instead of FAPAR, or LAI, etc...).
        mod_exp_plot_kwargs = deepcopy(experiment_plot_kwargs)
        for plot_kwargs in mod_exp_plot_kwargs.values():
            if plot_kwargs["label"].startswith("15VEG_"):
                plot_kwargs["label"] = "15VEG_X"
            elif plot_kwargs["label"].startswith("CURRDD_"):
                plot_kwargs["label"] = "CURRDD_X"

        x_factor_exp = 0
        x_factor = 10**x_factor_exp
        # x_factor_str = rf"$10^{{{x_factor_exp}}}$"

        y_factor_exp = -4
        y_factor = 10**y_factor_exp
        y_factor_str = rf"$10^{{{y_factor_exp}}}$"

        multi_model_ale_1d(
            comp_vars[0],
            plotting_experiment_data,
            mod_exp_plot_kwargs,
            verbose=verbose,
            legend_bbox=(0.5, 1.01),
            fig=fig,
            axes=axes[:, 0:1],
            lags=lags,
            x_ndigits=2,
            x_factor=x_factor,
            x_rotation=0,
            y_ndigits=0,
            y_factor=y_factor,
        )
        multi_model_ale_1d(
            comp_vars[1],
            plotting_experiment_data,
            experiment_plot_kwargs,
            verbose=verbose,
            legend=False,
            fig=fig,
            axes=axes[:, 1:2],
            lags=lags,
            x_ndigits=2,
            x_factor=x_factor,
            x_rotation=0,
            y_ndigits=0,
            y_factor=y_factor,
        )

        for ax in axes[:, 1]:
            ax.set_ylabel("")
        for ax in axes[:, 0]:
            lag_match = re.search("(\dM)", ax.get_xlabel())
            if lag_match:
                lag_m = f" {lag_match.group(1)}"
            else:
                lag_m = ""
            ax.set_ylabel(f"ALE{lag_m} ({y_factor_str} BA)")
        for ax in axes.flatten():
            ax.set_xlabel("")

        for ax, var in zip(axes[-1], comp_vars):
            assert x_factor_exp == 0
            ax.set_xlabel(
                f"{shorten_features(str(var))} ({variable.units[var]})")

        for ax, title in zip(axes.flatten(), ascii_lowercase):
            ax.text(0.5, 1.05, f"({title})", transform=ax.transAxes)

        margin = 0.4

        for ax in axes.ravel():
            ax.set_xlim(-margin, 20 + margin)

        fig.tight_layout(h_pad=0.4)
        fig.align_labels()

        figure_saver.save_figure(
            fig,
            f"{'__'.join(map(shorten_features, map(str, comp_vars)))}_ale_comp",
            sub_directory="ale_comp",
        )
Пример #4
0
def plot_2d_ale(experiment, single=False, nargs=None, verbose=False, **kwargs):
    exp_figure_saver = figure_saver(sub_directory=experiment.name)

    # Operate on cached data only.
    get_experiment_split_data.check_in_store(experiment)
    X_train, X_test, y_train, y_test = get_experiment_split_data(experiment)

    # Operate on cached fitted models only.
    get_model(X_train, y_train, cache_check=True)
    model = get_model(X_train, y_train)

    columns_list = list(combinations(X_train.columns, 2))

    # Deterministic sorting with FAPAR & FAPAR 1M and FAPAR & DRY_DAY_PERIOD at the
    # front since these are used in the paper.

    def get_combination_value(column_combination):
        # Handle special cases first.
        if (
            variable.FAPAR[0] in column_combination
            and variable.FAPAR[1] in column_combination
        ):
            return -1000
        elif (
            variable.FAPAR[0] in column_combination
            and variable.DRY_DAY_PERIOD[0] in column_combination
        ):
            return -999
        out = ""
        for var in column_combination:
            out += str(var.rank) + str(var.shift)
        return int(out)

    columns_list = sorted(columns_list, key=get_combination_value)

    def param_iter():
        for columns in columns_list:
            for plot_samples in [True, False]:
                yield columns, plot_samples

    if single:
        total = 1
    elif nargs:
        total = nargs
    else:
        total = 2 * len(columns_list)

    for columns, plot_samples in tqdm(
        islice(param_iter(), None, total),
        desc=f"2D ALE plotting ({experiment})",
        total=total,
        disable=not verbose,
    ):
        save_ale_2d(
            experiment=experiment,
            model=model,
            train_set=X_train,
            features=columns,
            n_jobs=get_ncpus(),
            include_first_order=True,
            plot_samples=plot_samples,
            figure_saver=exp_figure_saver,
            ale_factor_exp=plotting_configuration.ale_factor_exps.get(
                (columns[0].parent, columns[1].parent), -2
            ),
            x_factor_exp=plotting_configuration.factor_exps.get(columns[0].parent, 0),
            x_ndigits=plotting_configuration.ndigits.get(columns[0].parent, 2),
            y_factor_exp=plotting_configuration.factor_exps.get(columns[1].parent, 0),
            y_ndigits=plotting_configuration.ndigits.get(columns[1].parent, 2),
        )
        plt.close("all")
Пример #5
0
 def param_iter():
     for variable_factory in tqdm([variable.FAPAR, variable.DRY_DAY_PERIOD],
                                  desc="Feature"):
         for exclude_inst in tqdm([False, True], desc="Exclude inst."):
             yield exclude_inst, variable_factory
Пример #6
0
def plot_multi_ale(experiment, verbose=False, **kwargs):
    exp_figure_saver = figure_saver(sub_directory=experiment.name)

    # Operate on cached data only.
    get_experiment_split_data.check_in_store(experiment)
    X_train, X_test, y_train, y_test = get_experiment_split_data(experiment)

    # Operate on cached fitted models only.
    get_model(X_train, y_train, cache_check=True)
    model = get_model(X_train, y_train)

    fig, axes = plt.subplots(1, 2, figsize=(7.05, 2.8))

    expected_veg = tuple(
        map(
            itemgetter(0),
            variable.feature_categories[variable.Category.VEGETATION],
        )
    )

    matched = [f for f in expected_veg if f in X_train.columns]

    if len(matched) == 0:
        raise ValueError(f"Could not find one of {expected_veg} in {X_train.columns}.")
    elif len(matched) > 1:
        raise ValueError(
            f"Found more than one of {tuple(map(str, expected_veg))} in "
            f"{X_train.columns}: {matched}"
        )
    features = (matched[0].parent, variable.DRY_DAY_PERIOD)

    ale_factor_exp = -3
    x_factor_exp = 0

    for feature_factory, ax, title in zip(
        tqdm(features, desc="Processing features"),
        axes,
        ("(a)", "(b)"),
    ):
        multi_ale_1d(
            model=model,
            X_train=X_train,
            features=[feature_factory[lag] for lag in variable.lags[:5]],
            train_response=y_train,
            fig=fig,
            ax=ax,
            verbose=verbose,
            monte_carlo_rep=100,
            monte_carlo_ratio=get_frac_train_nr_samples(Experiment["15VEG_FAPAR"], 0.1),
            legend=False,
            ale_factor_exp=ale_factor_exp,
            x_factor_exp=x_factor_exp,
            x_ndigits=plotting_configuration.ndigits.get(feature_factory, 2),
            x_skip=4,
            x_rotation=0,
        )
        ax.set_title(title)
        ax.set_xlabel(
            f"{shorten_features(str(feature_factory))} ({variable.units[feature_factory]})"
            if x_factor_exp == 0
            else (
                f"{feature_factory} ($10^{{{x_factor_exp}}}$ "
                f"{variable.units[feature_factory]})"
            ),
        )

    axes[1].set_ylabel("")

    # Inset axis to pronounce low-DD features.

    ax2 = inset_axes(
        axes[1],
        width=2.155,
        height=1.55,
        loc="lower left",
        bbox_to_anchor=(0.019, 0.225),
        bbox_transform=ax.transAxes,
    )
    # Plot the DD data again on the inset axis.
    multi_ale_1d(
        model=model,
        X_train=X_train,
        features=[features[1][lag] for lag in variable.lags[:5]],
        train_response=y_train,
        fig=fig,
        ax=ax2,
        verbose=verbose,
        monte_carlo_rep=100,
        monte_carlo_ratio=get_frac_train_nr_samples(Experiment["15VEG_FAPAR"], 0.1),
        legend=False,
        ale_factor_exp=ale_factor_exp,
    )

    ax2.set_xlim(0, 17.5)
    ax2.set_ylim(-1.5e-3, 2e-3)

    ax2.xaxis.set_major_formatter(ticker.ScalarFormatter())
    ax2.yaxis.set_major_formatter(ticker.ScalarFormatter())
    ax2.tick_params(axis="both", which="both", length=0)
    plt.setp(ax2.get_xticklabels(), visible=False)
    plt.setp(ax2.get_yticklabels(), visible=False)

    ax2.set_ylabel("")
    ax2.set_xlabel("")
    ax2.grid(True)

    mark_inset(axes[1], ax2, loc1=4, loc2=2, fc="none", ec="0.3")

    # Move the first (left) axis to the right.
    orig_bbox = axes[0].get_position()
    axes[0].set_position(
        [orig_bbox.xmin + 0.021, orig_bbox.ymin, orig_bbox.width, orig_bbox.height]
    )

    # Explicitly set the x-axis labels' positions so they line up horizontally.
    y_min = 1
    for ax in axes:
        bbox = ax.get_position()
        if bbox.ymin < y_min:
            y_min = bbox.ymin
    for ax in axes:
        bbox = ax.get_position()
        mean_x = (bbox.xmin + bbox.xmax) / 2.0
        # NOTE - Decrease the negative offset to move the label upwards.
        ax.xaxis.set_label_coords(mean_x, y_min - 0.1, transform=fig.transFigure)

    # Plot the legend in between the two axes.
    axes[1].legend(
        loc="center",
        ncol=5,
        bbox_to_anchor=(
            np.mean(
                [
                    axes[0].get_position().xmax,
                    axes[1].get_position().xmin,
                ]
            ),
            0.932,
        ),
        bbox_transform=fig.transFigure,
        handletextpad=0.25,
        columnspacing=0.5,
    )

    exp_figure_saver.save_figure(
        fig,
        f'{experiment.name}_{"__".join(map(shorten_features, map(str, features)))}_ale_shifts',
        sub_directory="multi_ale",
        transparent=False,
    )
    args = [[], [], []]
    experiments = list(Experiment)

    cmd_args = get_parsers()["parser"].parse_args()

    if cmd_args.experiment is not None:
        chosen_experiments = expand_experiment_strs(cmd_args.experiment)
    else:
        chosen_experiments = experiments.copy()

    chosen_experiments = chosen_experiments[:1 if cmd_args.single else None]

    run_experiments = []
    for experiment in tqdm(
            chosen_experiments,
            desc="Determining run-experiments",
            disable=not cmd_args.verbose,
    ):
        try:
            # Check if a full cache is already present.
            # Operate on cached data only.
            get_experiment_split_data.check_in_store(experiment)
            X_train, X_test, y_train, y_test = get_experiment_split_data(
                experiment)

            # Operate on cached fitted models only.
            get_model(X_train, y_train, cache_check=True)
            rf = get_model(X_train, y_train)

            get_shap_values.check_in_store(rf, X_train)
            get_shap_values.check_in_store(rf, X_test)
        stacked_shaps = np.vstack(
            [data.data[np.newaxis] for data in selected_data])

        # Calculate the significance of the global maxima for each of the valid pixels.

        # Valid indices are recorded in 'shared_mask'.

        valid_i, valid_j = np.where(~shared_mask)
        total_valid = len(valid_i)

        peak_indices = []

        for i, j in zip(
                tqdm(valid_i,
                     desc="Evaluating maxima",
                     smoothing=0,
                     disable=verbose < 3),
                valid_j,
        ):
            ptp_threshold = ptp_threshold_factor * mean_ba[i, j]
            peaks_i = significant_peak(
                stacked_shaps[:, i, j],
                diff_threshold=diff_threshold,
                ptp_threshold=ptp_threshold,
                strict=False,
            )

            # Adding information about the sign of the mean influence, sorted by time.
            peak_indices.append(
                tuple(
                    f"{filtered_lags[p_i]}({'+' if stacked_shaps[p_i, i, j] > 0 else '-'})"
    cmd_args = get_parsers()["parser"].parse_args()

    if cmd_args.experiment is not None:
        chosen_experiments = [
            exp
            for exp in experiments
            if exp in tuple(Experiment[exp] for exp in cmd_args.experiment)
        ]
    else:
        chosen_experiments = experiments.copy()

    chosen_experiments = chosen_experiments[: 1 if cmd_args.single else None]

    for experiment in tqdm(
        chosen_experiments,
        desc="Preparing ALE 1D arguments",
        disable=not cmd_args.verbose,
    ):
        # Operate on cached data / models only.
        get_experiment_split_data.check_in_store(experiment)
        X_train, X_test, y_train, y_test = get_experiment_split_data(experiment)

        get_model(X_train, y_train, cache_check=True)

        for column in X_train.columns:
            args[0].append(experiment)
            args[1].extend(column)

    run(plot_1d_ale, *args, cx1_kwargs=cx1_kwargs)