# and obtain all possible pairwise preferences between these points. for it in range(num_iterations): # Print status: print('Run %i of %i, iteration %i of %i' % (i + 1, len(run_nums), it + 1, num_iterations)) # Preference data observed so far (used to train GP preference model): X = data_pt_idxs[: pref_count, :] y = labels[: pref_count, 1] # Update the Gaussian process preference model: posterior_model = feedback(X, y, GP_prior_cov_inv, preference_noise) # Sample new points at which to query for a preference: sampled_point_idxs, _ = advance(posterior_model, num_samples) # Obtain coordinate points corresponding to these indices, and # store the objective function values: sampled_points = np.empty((num_samples, state_dim)) for j in range(num_samples): sampled_point_idx = sampled_point_idxs[j] # Coordinate point representation: sampled_points[j, :] = points_to_sample[sampled_point_idx, :] sample_idx = it * num_samples + j # Objective function value: objective_values[sample_idx] = \ get_objective_value(sampled_point_idx, objective_function)
# Construct posterior covariance matrix: prior_cov = cov_evecs @ np.diag(cov_evals) @ np.linalg.inv(cov_evecs) # Posterior standard deviation at each point: GP_stdev = np.sqrt(np.diag(prior_cov)) # Check whether we have already drawn some posterior samples for this # number of preferences. post_sample_filename = 'Plotting_data/Samples_from_reward_model_' + str(pref_num) + \ '_pref.mat' if not os.path.isfile(post_sample_filename): # Draw a number of samples from the posterior model: trial_samples, reward_samples = advance(GP_model, 50) # Save samples and reward models: io.savemat(post_sample_filename, { 'samples': trial_samples, 'reward_models': reward_samples }) # Load and unpack saved reward information: data = io.loadmat(post_sample_filename) trial_samples = data['samples'].flatten() reward_samples = data['reward_models'] # Actions sampled in this simulation iteration: samples = data_pt_idxs[pref_num, :]
# and obtain all possible pairwise preferences between these points. for it in range(num_iterations): # Print status: print('Run %i of %i, iteration %i of %i' % (i + 1, len(run_nums), it + 1, num_iterations)) # Preference data observed so far (used to train GP preference model): X = data_pt_idxs[:pref_count, :] y = labels[:pref_count, 1] # Update the Gaussian process preference model: posterior_model = feedback(X, y, GP_prior_cov_inv, preference_noise) # Sample new points at which to query for a preference: sampled_point_idxs, reward_models = advance(posterior_model, num_samples) if ((it + 1) % 5 == 0): plot_progress(save_folder, points_to_sample, points_per_dimension, state_dim, posterior_model, it + 1, reward_models, 0) #dimension = 0 for visualization # Obtain coordinate points corresponding to these indices, and # store the objective function values: sampled_points = np.empty((num_samples, state_dim)) temp_obj_values = [] for j in range(num_samples): sampled_point_idx = sampled_point_idxs[j] # Coordinate point representation: sampled_points[j, :] = points_to_sample[sampled_point_idx]