def get_y_normalization_scales(dataset, gap_data=False): _x, _y, train_indices, _, test_indices = load_uci_data(f"{dataset}", gap_data=gap_data) y_normalization_scales = [] for i_split in range(len(train_indices)): _y_train = _y[train_indices[i_split]].reshape(-1, 1) y_preprocessor = StandardPreprocessor() y_preprocessor.fit(_y_train) y_normalization_scales.append(y_preprocessor.scaler.scale_) return np.array(y_normalization_scales)
from data.toy_regression import cos_linear_edge_data, ground_truth_cos_function tfd = tfp.distributions figure_dir = "figures/linear_outer_edges_uncertainty" figure_dir = Path(figure_dir) figure_dir.mkdir(parents=True, exist_ok=True) experiment_name = "linear-outer-edge" # %% seed = 1 n_train = 50 # train and test variables beginning with an underscore are unprocessed. _x_train, y_train = cos_linear_edge_data(n_data=n_train, sigma=0.1, seed=seed) preprocessor = StandardPreprocessor() x_train, _x_plot, x_plot = preprocessor.preprocess_create_x_train_x_plot( _x_train, test_ds=0.8) y_ground_truth = ground_truth_cos_function(_x_plot) layer_units = [50, 20] + [2] layer_activations = ["relu"] * (len(layer_units) - 1) + ["linear"] # %% codecell y_lim = [-5, 5] figsize = (10, 6) fig, ax = plt.subplots(figsize=figsize) plot_training_data(_x_train, y_train, fig=fig, ax=ax, y_lim=y_lim) plot_ground_truth(_x_plot, y_ground_truth, fig=fig, ax=ax, alpha=0.2) ax.set_xlabel("x") ax.set_ylabel("y")
hidden_layers_string = ( "two-hidden-layers" if n_hidden_layers == 2 else "one-hidden-layer" ) save_dir = f".save_uci_models/hmc-map-ensemble-comparison/{hidden_layers_string}/{dataset_name}" save_dir = Path(save_dir) save_dir.mkdir(parents=True, exist_ok=True) # train and test variables beginning with an underscore are unprocessed. _x, _y, train_indices, _, test_indices = load_uci_data(f"{dataset}") _x_train = _x[train_indices[data_seed]] _y_train = _y[train_indices[data_seed]].reshape(-1, 1) _x_test = _x[test_indices[data_seed]] _y_test = _y[test_indices[data_seed]].reshape(-1, 1) x_preprocessor = StandardPreprocessor() x_train = x_preprocessor.fit_transform(_x_train) x_test = x_preprocessor.transform(_x_test) y_preprocessor = StandardPreprocessor() y_train = y_preprocessor.fit_transform(_y_train) y_test = y_preprocessor.transform(_y_test) unnormalized_ll_constant = np.log(y_preprocessor.scaler.scale_) # %% with open("config/uci-hyperparameters-config.yaml") as f: experiment_config = yaml.full_load(f) train_seed = experiment_config["train_seed"] layer_units = experiment_config["layer_units"] layer_activations = experiment_config["layer_activations"]
# %% data_seed = 0 n_train = 20 # train and test variables beginning with an underscore are unprocessed. _x_train, _y_train = create_split_periodic_data_heteroscedastic( n_data=n_train, lower1=-1, upper1=0, lower2=1, upper2=2, sigma1=0.2, sigma2=0.2, seed=42, ) preprocessor = StandardPreprocessor() x_train, _x_plot, x_plot = preprocessor.preprocess_create_x_train_x_plot( _x_train, test_ds=0.5, n_test=200 ) _y_ground_truth = ground_truth_periodic_function(_x_plot) y_preprocessor = StandardPreprocessor() y_train = y_preprocessor.fit_transform(_y_train) y_ground_truth = y_preprocessor.transform(_y_ground_truth) # %% markdown # If we simply used two neurons in the output layer we can model heteroscedastic noise # %% input_shape = [1] layer_units = [20, 10, 2] layer_activations = ["relu"] * (len(layer_units) - 1) + ["linear"]