def param_iter(): for variable_factory in tqdm( [variable.FAPAR, variable.DRY_DAY_PERIOD], desc="Feature", disable=verbose < 1, ): for (exc_name, exclude_inst) in tqdm( [("with_inst", False), ("no_inst", True)], desc="Exclude inst.", disable=verbose < 2, ): yield (exc_name, exclude_inst), variable_factory
def plot_clim_mon_ale_comp(*args, verbose=False, **kwargs): fig, axes = plt.subplots(2, 2, figsize=(5.8, 4.1)) plot_spec = { axes[0, 0]: (Experiment["15VEG_FAPAR"], variable.FAPAR[0], "(a) 15VEG_FAPAR"), axes[0, 1]: ( Experiment["15VEG_FAPAR_MON"], variable.FAPAR[0], "(b) 15VEG_FAPAR_MON", ), axes[1, 0]: ( Experiment["15VEG_FAPAR"], variable.DRY_DAY_PERIOD[3], "(c) 15VEG_FAPAR", ), axes[1, 1]: ( Experiment["15VEG_FAPAR_MON"], variable.DRY_DAY_PERIOD[3], "(d) 15VEG_FAPAR_MON", ), } for (ax, (experiment, column, title)) in tqdm(plot_spec.items(), desc="ALE plots", disable=not verbose): plot_single_1d_ale(experiment, column, ax=ax, verbose=verbose) ax.set_title(title) gc.collect() for ax in axes[:, 1]: ax.set_ylabel("") fig.tight_layout() fig.align_labels() figure_saver.save_figure(fig, "15VEG_FAPAR_15VEG_FAPAR_MON_ALE_comp")
def multi_model_ale_plot(*args, verbose=False, **kwargs): # Experiments for which data will be plotted. experiments = [ Experiment["ALL"], Experiment["TOP15"], Experiment["CURR"], Experiment["BEST15"], Experiment["15VEG_FAPAR"], Experiment["15VEG_LAI"], Experiment["15VEG_VOD"], Experiment["15VEG_SIF"], Experiment["CURRDD_FAPAR"], Experiment["CURRDD_LAI"], Experiment["CURRDD_VOD"], Experiment["CURRDD_SIF"], ] # Operate on cached data/models only. experiment_masks = [] plotting_experiment_data = {} for experiment in tqdm(experiments, desc="Loading data"): get_data(experiment, cache_check=True) get_experiment_split_data.check_in_store(experiment) X_train, X_test, y_train, y_test = get_experiment_split_data( experiment) get_model(X_train, y_train, cache_check=True) experiment_masks.append(get_endog_exog_mask(experiment)[2]) plotting_experiment_data[experiment] = dict( model=get_model(X_train, y_train), X_train=X_train, ) # Ensure masks are aligned. check_master_masks(*experiment_masks) lags = (0, 1, 3, 6, 9) for comp_vars in [[variable.FAPAR, variable.LAI], [variable.SIF, variable.VOD]]: fig, axes = plt.subplots(5, 2, sharex="col", figsize=(7.0, 5.8)) # Create general legend labels (with 'X' instead of FAPAR, or LAI, etc...). mod_exp_plot_kwargs = deepcopy(experiment_plot_kwargs) for plot_kwargs in mod_exp_plot_kwargs.values(): if plot_kwargs["label"].startswith("15VEG_"): plot_kwargs["label"] = "15VEG_X" elif plot_kwargs["label"].startswith("CURRDD_"): plot_kwargs["label"] = "CURRDD_X" x_factor_exp = 0 x_factor = 10**x_factor_exp # x_factor_str = rf"$10^{{{x_factor_exp}}}$" y_factor_exp = -4 y_factor = 10**y_factor_exp y_factor_str = rf"$10^{{{y_factor_exp}}}$" multi_model_ale_1d( comp_vars[0], plotting_experiment_data, mod_exp_plot_kwargs, verbose=verbose, legend_bbox=(0.5, 1.01), fig=fig, axes=axes[:, 0:1], lags=lags, x_ndigits=2, x_factor=x_factor, x_rotation=0, y_ndigits=0, y_factor=y_factor, ) multi_model_ale_1d( comp_vars[1], plotting_experiment_data, experiment_plot_kwargs, verbose=verbose, legend=False, fig=fig, axes=axes[:, 1:2], lags=lags, x_ndigits=2, x_factor=x_factor, x_rotation=0, y_ndigits=0, y_factor=y_factor, ) for ax in axes[:, 1]: ax.set_ylabel("") for ax in axes[:, 0]: lag_match ="(\dM)", ax.get_xlabel()) if lag_match: lag_m = f" {}" else: lag_m = "" ax.set_ylabel(f"ALE{lag_m} ({y_factor_str} BA)") for ax in axes.flatten(): ax.set_xlabel("") for ax, var in zip(axes[-1], comp_vars): assert x_factor_exp == 0 ax.set_xlabel( f"{shorten_features(str(var))} ({variable.units[var]})") for ax, title in zip(axes.flatten(), ascii_lowercase): ax.text(0.5, 1.05, f"({title})", transform=ax.transAxes) margin = 0.4 for ax in axes.ravel(): ax.set_xlim(-margin, 20 + margin) fig.tight_layout(h_pad=0.4) fig.align_labels() figure_saver.save_figure( fig, f"{'__'.join(map(shorten_features, map(str, comp_vars)))}_ale_comp", sub_directory="ale_comp", )
def plot_2d_ale(experiment, single=False, nargs=None, verbose=False, **kwargs): exp_figure_saver = figure_saver( # Operate on cached data only. get_experiment_split_data.check_in_store(experiment) X_train, X_test, y_train, y_test = get_experiment_split_data(experiment) # Operate on cached fitted models only. get_model(X_train, y_train, cache_check=True) model = get_model(X_train, y_train) columns_list = list(combinations(X_train.columns, 2)) # Deterministic sorting with FAPAR & FAPAR 1M and FAPAR & DRY_DAY_PERIOD at the # front since these are used in the paper. def get_combination_value(column_combination): # Handle special cases first. if ( variable.FAPAR[0] in column_combination and variable.FAPAR[1] in column_combination ): return -1000 elif ( variable.FAPAR[0] in column_combination and variable.DRY_DAY_PERIOD[0] in column_combination ): return -999 out = "" for var in column_combination: out += str(var.rank) + str(var.shift) return int(out) columns_list = sorted(columns_list, key=get_combination_value) def param_iter(): for columns in columns_list: for plot_samples in [True, False]: yield columns, plot_samples if single: total = 1 elif nargs: total = nargs else: total = 2 * len(columns_list) for columns, plot_samples in tqdm( islice(param_iter(), None, total), desc=f"2D ALE plotting ({experiment})", total=total, disable=not verbose, ): save_ale_2d( experiment=experiment, model=model, train_set=X_train, features=columns, n_jobs=get_ncpus(), include_first_order=True, plot_samples=plot_samples, figure_saver=exp_figure_saver, ale_factor_exp=plotting_configuration.ale_factor_exps.get( (columns[0].parent, columns[1].parent), -2 ), x_factor_exp=plotting_configuration.factor_exps.get(columns[0].parent, 0), x_ndigits=plotting_configuration.ndigits.get(columns[0].parent, 2), y_factor_exp=plotting_configuration.factor_exps.get(columns[1].parent, 0), y_ndigits=plotting_configuration.ndigits.get(columns[1].parent, 2), ) plt.close("all")
def param_iter(): for variable_factory in tqdm([variable.FAPAR, variable.DRY_DAY_PERIOD], desc="Feature"): for exclude_inst in tqdm([False, True], desc="Exclude inst."): yield exclude_inst, variable_factory
def plot_multi_ale(experiment, verbose=False, **kwargs): exp_figure_saver = figure_saver( # Operate on cached data only. get_experiment_split_data.check_in_store(experiment) X_train, X_test, y_train, y_test = get_experiment_split_data(experiment) # Operate on cached fitted models only. get_model(X_train, y_train, cache_check=True) model = get_model(X_train, y_train) fig, axes = plt.subplots(1, 2, figsize=(7.05, 2.8)) expected_veg = tuple( map( itemgetter(0), variable.feature_categories[variable.Category.VEGETATION], ) ) matched = [f for f in expected_veg if f in X_train.columns] if len(matched) == 0: raise ValueError(f"Could not find one of {expected_veg} in {X_train.columns}.") elif len(matched) > 1: raise ValueError( f"Found more than one of {tuple(map(str, expected_veg))} in " f"{X_train.columns}: {matched}" ) features = (matched[0].parent, variable.DRY_DAY_PERIOD) ale_factor_exp = -3 x_factor_exp = 0 for feature_factory, ax, title in zip( tqdm(features, desc="Processing features"), axes, ("(a)", "(b)"), ): multi_ale_1d( model=model, X_train=X_train, features=[feature_factory[lag] for lag in variable.lags[:5]], train_response=y_train, fig=fig, ax=ax, verbose=verbose, monte_carlo_rep=100, monte_carlo_ratio=get_frac_train_nr_samples(Experiment["15VEG_FAPAR"], 0.1), legend=False, ale_factor_exp=ale_factor_exp, x_factor_exp=x_factor_exp, x_ndigits=plotting_configuration.ndigits.get(feature_factory, 2), x_skip=4, x_rotation=0, ) ax.set_title(title) ax.set_xlabel( f"{shorten_features(str(feature_factory))} ({variable.units[feature_factory]})" if x_factor_exp == 0 else ( f"{feature_factory} ($10^{{{x_factor_exp}}}$ " f"{variable.units[feature_factory]})" ), ) axes[1].set_ylabel("") # Inset axis to pronounce low-DD features. ax2 = inset_axes( axes[1], width=2.155, height=1.55, loc="lower left", bbox_to_anchor=(0.019, 0.225), bbox_transform=ax.transAxes, ) # Plot the DD data again on the inset axis. multi_ale_1d( model=model, X_train=X_train, features=[features[1][lag] for lag in variable.lags[:5]], train_response=y_train, fig=fig, ax=ax2, verbose=verbose, monte_carlo_rep=100, monte_carlo_ratio=get_frac_train_nr_samples(Experiment["15VEG_FAPAR"], 0.1), legend=False, ale_factor_exp=ale_factor_exp, ) ax2.set_xlim(0, 17.5) ax2.set_ylim(-1.5e-3, 2e-3) ax2.xaxis.set_major_formatter(ticker.ScalarFormatter()) ax2.yaxis.set_major_formatter(ticker.ScalarFormatter()) ax2.tick_params(axis="both", which="both", length=0) plt.setp(ax2.get_xticklabels(), visible=False) plt.setp(ax2.get_yticklabels(), visible=False) ax2.set_ylabel("") ax2.set_xlabel("") ax2.grid(True) mark_inset(axes[1], ax2, loc1=4, loc2=2, fc="none", ec="0.3") # Move the first (left) axis to the right. orig_bbox = axes[0].get_position() axes[0].set_position( [orig_bbox.xmin + 0.021, orig_bbox.ymin, orig_bbox.width, orig_bbox.height] ) # Explicitly set the x-axis labels' positions so they line up horizontally. y_min = 1 for ax in axes: bbox = ax.get_position() if bbox.ymin < y_min: y_min = bbox.ymin for ax in axes: bbox = ax.get_position() mean_x = (bbox.xmin + bbox.xmax) / 2.0 # NOTE - Decrease the negative offset to move the label upwards. ax.xaxis.set_label_coords(mean_x, y_min - 0.1, transform=fig.transFigure) # Plot the legend in between the two axes. axes[1].legend( loc="center", ncol=5, bbox_to_anchor=( np.mean( [ axes[0].get_position().xmax, axes[1].get_position().xmin, ] ), 0.932, ), bbox_transform=fig.transFigure, handletextpad=0.25, columnspacing=0.5, ) exp_figure_saver.save_figure( fig, f'{}_{"__".join(map(shorten_features, map(str, features)))}_ale_shifts', sub_directory="multi_ale", transparent=False, )
args = [[], [], []] experiments = list(Experiment) cmd_args = get_parsers()["parser"].parse_args() if cmd_args.experiment is not None: chosen_experiments = expand_experiment_strs(cmd_args.experiment) else: chosen_experiments = experiments.copy() chosen_experiments = chosen_experiments[:1 if cmd_args.single else None] run_experiments = [] for experiment in tqdm( chosen_experiments, desc="Determining run-experiments", disable=not cmd_args.verbose, ): try: # Check if a full cache is already present. # Operate on cached data only. get_experiment_split_data.check_in_store(experiment) X_train, X_test, y_train, y_test = get_experiment_split_data( experiment) # Operate on cached fitted models only. get_model(X_train, y_train, cache_check=True) rf = get_model(X_train, y_train) get_shap_values.check_in_store(rf, X_train) get_shap_values.check_in_store(rf, X_test)
stacked_shaps = np.vstack( [[np.newaxis] for data in selected_data]) # Calculate the significance of the global maxima for each of the valid pixels. # Valid indices are recorded in 'shared_mask'. valid_i, valid_j = np.where(~shared_mask) total_valid = len(valid_i) peak_indices = [] for i, j in zip( tqdm(valid_i, desc="Evaluating maxima", smoothing=0, disable=verbose < 3), valid_j, ): ptp_threshold = ptp_threshold_factor * mean_ba[i, j] peaks_i = significant_peak( stacked_shaps[:, i, j], diff_threshold=diff_threshold, ptp_threshold=ptp_threshold, strict=False, ) # Adding information about the sign of the mean influence, sorted by time. peak_indices.append( tuple( f"{filtered_lags[p_i]}({'+' if stacked_shaps[p_i, i, j] > 0 else '-'})"
cmd_args = get_parsers()["parser"].parse_args() if cmd_args.experiment is not None: chosen_experiments = [ exp for exp in experiments if exp in tuple(Experiment[exp] for exp in cmd_args.experiment) ] else: chosen_experiments = experiments.copy() chosen_experiments = chosen_experiments[: 1 if cmd_args.single else None] for experiment in tqdm( chosen_experiments, desc="Preparing ALE 1D arguments", disable=not cmd_args.verbose, ): # Operate on cached data / models only. get_experiment_split_data.check_in_store(experiment) X_train, X_test, y_train, y_test = get_experiment_split_data(experiment) get_model(X_train, y_train, cache_check=True) for column in X_train.columns: args[0].append(experiment) args[1].extend(column) run(plot_1d_ale, *args, cx1_kwargs=cx1_kwargs)