def get_y_normalization_scales(dataset, gap_data=False): _x, _y, train_indices, _, test_indices = load_uci_data(f"{dataset}", gap_data=gap_data) y_normalization_scales = [] for i_split in range(len(train_indices)): _y_train = _y[train_indices[i_split]].reshape(-1, 1) y_preprocessor = StandardPreprocessor() y_preprocessor.fit(_y_train) y_normalization_scales.append(y_preprocessor.scaler.scale_) return np.array(y_normalization_scales)
# %% dataset = "yacht" data_seed = 0 dataset_name = f"{dataset}_{data_seed + 1:02}" n_hidden_layers = 1 hidden_layers_string = ( "two-hidden-layers" if n_hidden_layers == 2 else "one-hidden-layer" ) save_dir = f".save_uci_models/hmc-map-ensemble-comparison/{hidden_layers_string}/{dataset_name}" save_dir = Path(save_dir) save_dir.mkdir(parents=True, exist_ok=True) # train and test variables beginning with an underscore are unprocessed. _x, _y, train_indices, _, test_indices = load_uci_data(f"{dataset}") _x_train = _x[train_indices[data_seed]] _y_train = _y[train_indices[data_seed]].reshape(-1, 1) _x_test = _x[test_indices[data_seed]] _y_test = _y[test_indices[data_seed]].reshape(-1, 1) x_preprocessor = StandardPreprocessor() x_train = x_preprocessor.fit_transform(_x_train) x_test = x_preprocessor.transform(_x_test) y_preprocessor = StandardPreprocessor() y_train = y_preprocessor.fit_transform(_y_train) y_test = y_preprocessor.transform(_y_test) unnormalized_ll_constant = np.log(y_preprocessor.scaler.scale_) # %%
use_gap_data=False, train_seed=0, validation_split=0.0, weight_prior_scale=weight_prior_scale, bias_prior_scale=weight_prior_scale, model_kwargs=kwargs, fit_kwargs=fit_kwargs, fit_kwargs_list=None, ensemble_predict_moment_matched=False # If True the NLL will also be computed for # the moment matched predictive distribution and not just the mixture predictive # distribution. ) # %% x, y, train_indices, validation_indices, test_indices = load_uci_data( dataset="boston", validation_split=0.0, gap_data=False ) dataset_splits_ensemble_rmses = [] dataset_splits_single_network_rmses = [] dataset_splits_ensemble_negative_log_likelihoods = [] dataset_splits_single_network_negative_log_likelihoods = [] for i, train, test in zip(range(len(train_indices)), train_indices, test_indices,): x_train = x[train] x_test = x[test] y_train = y[train].reshape(-1, 1) y_test = y[test].reshape(-1, 1) predictive_distribution = ensemble_models[i].predict(x_train) ensemble_rmse = calculate_rmse(predictive_distribution.mean(), y_train) ensemble_negative_log_likelihood = -tf.reduce_mean( predictive_distribution.log_prob(y_train)
def evaluate_uci( model, dataset="boston", use_gap_data=False, data_seed=0, train_seed=0, validation_size=0.0, model_kwargs={}, fit_kwargs={}, verbose=0, ensemble_predict_moment_matched=False # If True the NLL will also be computed for # the moment matched predictive distribution and not just the mixture predictive # distribution. ): x, y, train_indices, validation_indices, test_indices = load_uci_data( dataset, validation_size, gap_data=use_gap_data) input_shape = [x.shape[1]] model_kwargs["input_shape"] = input_shape # x_train, x_test, y_train, y_test = train_test_split( # x, y, test_size=test_size, random_state=data_seed # ) x_train = x[train_indices[data_seed]] x_validation = x[validation_indices[data_seed]] x_test = x[test_indices[data_seed]] y_train = y[train_indices[data_seed]] y_validation = y[validation_indices[data_seed]] y_test = y[test_indices[data_seed]] y_train = y_train.reshape(-1, 1) y_validation = y_validation.reshape(-1, 1) y_test = y_test.reshape(-1, 1) if model == VariationalDensityNetwork: model_kwargs["kl_weight"] = 1 / x_train.shape[0] if inspect.isclass(model): model = model(**model_kwargs, seed=train_seed) validation_data = (x_validation, y_validation) if validation_size == 0: validation_data = None model.fit( x_train, y_train, **fit_kwargs, validation_data=validation_data, verbose=verbose, # validation_split=validation_size, ) # print("validation loss after training", model.evaluate(x_validation, y_validation)) predictive_distribution = model.predict(x_validation) print( "validation loss by hand", -tf.reduce_mean(predictive_distribution.log_prob(y_validation)), ) total_epochs = model.total_epochs predictive_distribution = model.predict(x_test) # independent = tfd.Independent(predictive_distribution, reinterpreted_batch_ndims=2) rmse = calculate_rmse(predictive_distribution.mean(), y_test) negative_log_likelihood = -tf.reduce_mean( predictive_distribution.log_prob(y_test)) if ensemble_predict_moment_matched: predictive_distribution = model.predict_moment_matched_gaussian(x_test) rmse_mm = (calculate_rmse(predictive_distribution.mean(), y_test).numpy().astype(np.float)) assert rmse_mm == rmse mm_negative_log_likelihood = (-tf.reduce_mean( predictive_distribution.log_prob(y_test)).numpy().astype(np.float)) return ( rmse, negative_log_likelihood, mm_negative_log_likelihood, model, total_epochs, ) # print(tf.reduce_sum(predictive_distribution.log_prob(y_test)) / y_test.shape[0]) # print(independent.log_prob(y_test) / y_test.shape[0]) return rmse, negative_log_likelihood, model, total_epochs
def uci_benchmark_ensemble_sizes_save_plot( experiment_name, model_save_dir=None, dataset="boston", use_gap_data=False, train_seed=0, layer_units=[50, 1], layer_activations=["relu", "linear"], initial_unconstrained_scale=-1, transform_unconstrained_scale_factor=0.5, learning_rate=0.01, epochs=40, batch_size=100, n_networks=5, early_stop_callback=None, weight_prior_scale=None, bias_prior_scale=None, weight_prior=None, bias_prior=None, noise_scale_prior=None, last_layer_prior="non-informative", last_layer_prior_params=None, validation_split=0.0, save=True, # wether to save the results dict verbose=False, ): tf.keras.backend.clear_session() results = {} fit_kwargs = { "epochs": epochs, "batch_size": batch_size, "early_stop_callback": early_stop_callback, } layer_names = [None] * (len(layer_units) - 2) + [ "feature_extractor", "output" ] model_kwargs = { "layer_units": layer_units, "layer_activations": layer_activations, "initial_unconstrained_scale": initial_unconstrained_scale, "transform_unconstrained_scale_factor": transform_unconstrained_scale_factor, "weight_prior": weight_prior, "bias_prior": bias_prior, "noise_scale_prior": noise_scale_prior, "preprocess_x": True, "preprocess_y": True, "learning_rate": learning_rate, "names": layer_names, "n_networks": n_networks, } if model_save_dir is not None: model_save_dir = Path(model_save_dir) model_save_dir.mkdir(parents=True, exist_ok=True) ensemble_save_path = model_save_dir.joinpath( f"uci_benchmark_ensemble_sizes_{experiment_name}_{dataset}_gap-{use_gap_data}" ) if ensemble_save_path.is_dir(): print("Loading ensemble from disk") ensemble_models = load_models( ensemble_save_path, load_model_function=map_density_ensemble_from_save_path, ) else: (_, _, ensemble_models, _, _) = kfold_evaluate_uci( dataset=dataset, use_gap_data=use_gap_data, model_class=MapDensityEnsemble, train_seed=train_seed, validation_split=validation_split, weight_prior_scale=weight_prior_scale, bias_prior_scale=bias_prior_scale, model_kwargs=model_kwargs, fit_kwargs=fit_kwargs, verbose=verbose, ) save_models(ensemble_save_path, ensemble_models) else: (_, _, ensemble_models, total_epochs, _) = kfold_evaluate_uci( dataset=dataset, use_gap_data=use_gap_data, model_class=MapDensityEnsemble, train_seed=train_seed, validation_split=validation_split, weight_prior_scale=weight_prior_scale, bias_prior_scale=bias_prior_scale, model_kwargs=model_kwargs, fit_kwargs=fit_kwargs, verbose=verbose, ) print(total_epochs) print("Done Ensemble Training.") x, y, train_indices, validation_indices, test_indices = load_uci_data( dataset, validation_split=validation_split, gap_data=use_gap_data) results = {} rmses, nlls, mm_nlls = [], [], [] _net_sizes = np.arange(n_networks) + 1 _net_sizes = _net_sizes[np.logical_or( _net_sizes <= 20, _net_sizes % 5 == 0)] # Only test for network sizes below 20 or divisible by 5 for size in _net_sizes: size_rmses, size_nlls, size_mm_nlls = [], [], [] for split, model in enumerate(ensemble_models): x_test = x[test_indices[split]] y_test = y[test_indices[split]].reshape(-1, 1) rmse, nll, mm_nll = evaluate_ensemble_size(model, x_test, y_test, size=size, seed=0) size_rmses.append(rmse) size_nlls.append(nll) size_mm_nlls.append(mm_nll) rmses.append(size_rmses) nlls.append(size_nlls) mm_nlls.append(size_mm_nlls) results["Ensemble"] = {"RMSEs": rmses, "NLLs": nlls, "MM-NLLs": mm_nlls} print("Done Ensemble Testing.") model_kwargs.pop("names") model_kwargs.pop("weight_prior") model_kwargs.pop("bias_prior") model_kwargs.pop("noise_scale_prior") model_kwargs["last_layer_prior"] = last_layer_prior model_kwargs["last_layer_prior_params"] = last_layer_prior_params llb_ensemble_fit_kwargs_list = [{ "pretrained_networks": model.networks } for model in ensemble_models] _, _, llb_ensemble_models, _, _ = kfold_evaluate_uci( dataset=dataset, use_gap_data=use_gap_data, model_class=LLBEnsemble, train_seed=train_seed, validation_split=validation_split, model_kwargs=model_kwargs, fit_kwargs_list=llb_ensemble_fit_kwargs_list, ) print("Done LLB Ensemble Training.") rmses, nlls, mm_nlls = [], [], [] for size in _net_sizes: size_rmses, size_nlls, size_mm_nlls = [], [], [] for split, model in enumerate(llb_ensemble_models): x_test = x[test_indices[split]] y_test = y[test_indices[split]].reshape(-1, 1) rmse, nll, mm_nll = evaluate_ensemble_size(model, x_test, y_test, size=size, seed=0) size_rmses.append(rmse) size_nlls.append(nll) size_mm_nlls.append(mm_nll) rmses.append(size_rmses) nlls.append(size_nlls) mm_nlls.append(size_mm_nlls) results["LLB Ensemble"] = { "RMSEs": rmses, "NLLs": nlls, "MM-NLLs": mm_nlls } if save: save_results( experiment_name, dataset, results, use_gap_data=use_gap_data, sub_folder="ensemble_sizes", ) # e_rmses = results["Ensemble"]["RMSEs"] e_nlls = results["Ensemble"]["NLLs"] e_mm_nlls = results["Ensemble"]["MM-NLLs"] # llbe_rmses = results["LLB Ensemble"]["RMSEs"] llbe_nlls = results["LLB Ensemble"]["NLLs"] llbe_mm_nlls = results["LLB Ensemble"]["MM-NLLs"] labels = ["Ensemble MM", "Ensemble", "LLB Ensemble MM", "LLB Ensemble"] colors = sns.color_palette() with open("config/uci-color-config.yaml") as f: color_mapping = yaml.full_load(f) plot_uci_ensemble_size_benchmark( [e_mm_nlls, e_nlls, llbe_mm_nlls, llbe_nlls], labels=labels, title=dataset, x_label="# ensemble_memebers", y_label="Negative Log Likelihood", colors=[colors[color_mapping[method]] for method in labels], ) return results
def kfold_evaluate_uci( model_class, dataset="boston", use_gap_data=False, train_seed=0, validation_split=0.0, weight_prior_scale=None, bias_prior_scale=None, model_kwargs={}, fit_kwargs={}, fit_kwargs_list=None, ensemble_predict_moment_matched=False, # If True the NLL will also be computed for # the moment matched predictive distribution and not just the mixture predictive # distribution. verbose=False, ): x, y, train_indices, validation_indices, test_indices = load_uci_data( dataset, validation_split=validation_split, gap_data=use_gap_data) input_shape = [x.shape[1]] model_kwargs["input_shape"] = input_shape rmses = [] negative_log_likelihoods = [] models = [] total_epochs = [] fit_times = [] if ensemble_predict_moment_matched: mm_negative_log_likelihoods = [] if fit_kwargs_list is None: fit_kwargs_list = [fit_kwargs for i in range(len(train_indices))] for i, train, validation, test, fit_kwargs in zip( range(len(train_indices)), train_indices, validation_indices, test_indices, fit_kwargs_list, ): x_train = x[train] x_validation = x[validation] x_test = x[test] y_train = y[train].reshape(-1, 1) y_validation = y[validation].reshape(-1, 1) y_test = y[test].reshape(-1, 1) validation_data = (x_validation, y_validation) x_train, y_train = shuffle(x_train, y_train, random_state=train_seed + i) if validation.size == 0: validation_data = None if model_class == VariationalDensityNetwork: model_kwargs["kl_weight"] = 1 / x_train.shape[0] n_train = x_train.shape[0] if ("weight_prior" in model_kwargs.keys() or "noise_scale_prior" in model_kwargs.keys()): model_kwargs["n_train"] = n_train if weight_prior_scale is not None: l2_weight_lambda = prior_scale_to_regularization_lambda( weight_prior_scale, n_train) model_kwargs["l2_weight_lambda"] = l2_weight_lambda if bias_prior_scale is not None: l2_bias_lambda = prior_scale_to_regularization_lambda( bias_prior_scale, n_train) model_kwargs["l2_bias_lambda"] = l2_bias_lambda model = model_class(**model_kwargs, seed=train_seed + i) if model_class == VariationalDensityNetwork: model_kwargs.pop("kl_weight") if "n_train" in model_kwargs.keys(): model_kwargs.pop("n_train") start = time.time() model.fit(x_train, y_train, **fit_kwargs, validation_data=validation_data, verbose=0) end = time.time() fit_times.append(end - start) total_epochs.append(model.total_epochs) predictive_distribution = model.predict(x_test) rmses.append( calculate_rmse(predictive_distribution.mean(), y_test).numpy().astype(np.float)) negative_log_likelihood = (-tf.reduce_mean( predictive_distribution.log_prob(y_test)).numpy().astype(np.float)) negative_log_likelihoods.append(negative_log_likelihood) if ensemble_predict_moment_matched: predictive_distribution = model.predict_moment_matched_gaussian( x_test) rmse_mm = (calculate_rmse(predictive_distribution.mean(), y_test).numpy().astype(np.float)) assert rmse_mm == rmses[-1] mm_negative_log_likelihood = (-tf.reduce_mean( predictive_distribution.log_prob(y_test)).numpy().astype( np.float)) mm_negative_log_likelihoods.append(mm_negative_log_likelihood) models.append(model) if verbose: print(f"Done Split {i}") return_tuple = (( rmses, negative_log_likelihoods, mm_negative_log_likelihoods, models, total_epochs, fit_times, ) if ensemble_predict_moment_matched else (rmses, negative_log_likelihoods, models, total_epochs, fit_times)) return return_tuple