def __init__(self, num_objectives, seed, utility_function=None, user_std=0.1, temp_linear_prior=False, add_virtual_comp=False, add_virt_comp_global=False, keep_set_small=False, thresh_dist=0.001): self.random_state = np.random.RandomState(seed) if utility_function is None: user = UserPreference(num_objectives, user_std, seed) self.utility_function = user.get_preference else: self.utility_function = lambda x, add_noise: utility_function( x) + int(add_noise) * self.random_state.normal(0, user_std) self.dataset = DatasetPairwise(num_objectives) self.gp = GPPairwise(num_objectives, kernel_width=0.45, std_noise=user_std, seed=seed) self.temp_linear_prior = temp_linear_prior self.add_virtual_comp = add_virtual_comp self.min_point = None self.max_point = None self.add_virt_comp_global = add_virt_comp_global self.keep_set_small = keep_set_small self.thresh_dist = thresh_dist
def continue_pairwise(username, side_clicked): # get the dataset for this user dataset_user = utils_users.get_gp_dataset( username, 'pairwise', num_objectives=specs_jobs.NUM_OBJECTIVES) # initialise the acquirer which picks new datapoints acquirer = DiscreteAcquirer(input_domain=utils_jobs.get_jobs(), query_type='pairwise', seed=specs_jobs.SEED) # intialise the GP gp = GPPairwise(num_objectives=specs_jobs.NUM_OBJECTIVES, seed=specs_jobs.SEED) # add collected datapoints to acquirer acquirer.history = dataset_user.datapoints # add collected datapoints to GP gp.update(dataset_user) # get the best job so far job_best_idx = dataset_user.comparisons[-1, 0] job_best = dataset_user.datapoints[job_best_idx] # let acquirer pick new point job_new = acquirer.get_next_point(gp, dataset_user) # sort according to what user did last round if side_clicked == "1": job1 = job_best job2 = job_new else: job1 = job_new job2 = job_best # transform the two jobs into dictionaries job1 = utils_jobs.job_array_to_job_dict(job1) job2 = utils_jobs.job_array_to_job_dict(job2) # get the start time for this user start_time = utils_users.get_experiment_start_time(username, 'pairwise') if time.time() - start_time < specs_jobs.TIME_EXPERIMENT_SEC: return render_template("query_pairwise_jobs.html", username=username, job1=job1, job2=job2, side_clicked=side_clicked) else: utils_users.update_experiment_status(username=username, query_type='pairwise') return redirect('start_experiment/{}'.format(username))
def initialise_gaussian_process(self): if self.params['gp prior mean'] == 'linear-zero': prior_mean_type = 'linear' else: prior_mean_type = self.params['gp prior mean'] return GPPairwise(num_objectives=self.params['num objectives'], std_noise=self.params["gp noise hyperparameter"], kernel_width=self.params["gp kernel hyperparameter"], prior_mean_type=prior_mean_type, seed=self.params['seed'])
def start(username): # get the start time for this user utils_users.get_experiment_start_time(username, 'ranking') # get the dataset for this user user_dataset = utils_users.get_gp_dataset(username, 'ranking', num_objectives=specs_traffic.NUM_OBJECTIVES) # initialise acquirer acquirer = DiscreteAcquirer(input_domain=data_traffic.get_traffic_results(), query_type='clustering', seed=specs_traffic.SEED) # if no data has been collected yet, we only display two starting traffic if user_dataset.comparisons.shape[0] == 0: # delete any datapoint in the user's dataset (in case experiment was aborted) user_dataset.datapoints = np.empty((0, specs_traffic.NUM_OBJECTIVES)) # get the starting points from the acquirer item1, item2 = acquirer.get_start_points() # add traffic to dataset of user item1_ID = user_dataset._add_single_datapoint(item1) item2_ID = user_dataset._add_single_datapoint(item2) # save dataset utils_users.update_gp_dataset(username, user_dataset, 'ranking') # put traffic we want to display in the respective lists traffic_unranked = [item1, item2] IDs_unranked = [item1_ID, item2_ID] traffic_ranked = [] IDs_ranked = [] # otherwise, we show the previous ranking and pick a new point according to that else: # add collected datapoints to acquirer acquirer.history = user_dataset.datapoints # add virtual comparisons in first few queries for i in range(np.min([user_dataset.datapoints.shape[0], 6])): user_dataset.add_single_comparison(user_dataset.datapoints[i], data_traffic.get_traffic_min()) user_dataset.add_single_comparison(data_traffic.get_traffic_max(), user_dataset.datapoints[i]) # add linear prior in first few queries if acquirer.history.shape[0] < 6: prior_mean_type = 'linear' else: prior_mean_type = 'zero' print("comparisons after adding stuff", user_dataset.comparisons) # intialise the GP gp = GPPairwise(num_objectives=specs_traffic.NUM_OBJECTIVES, seed=specs_traffic.SEED, prior_mean_type=prior_mean_type) # add collected datapoints to GP gp.update(user_dataset) # let acquirer pick new point job_new = acquirer.get_next_point(gp, user_dataset) # add that point to the dataset and save job_new_ID = user_dataset._add_single_datapoint(job_new) utils_users.update_gp_dataset(username, user_dataset, 'ranking') # put into list of traffic that need to be ranked traffic_unranked = [job_new] IDs_unranked = [job_new_ID] # get ranking so far IDs_ranked = utils_users.get_ranking(username) # get the job information from that ranking and convert to dictionaries traffic_ranked = user_dataset.datapoints[IDs_ranked] # get names of objectives obj_names = data_traffic.get_objective_names() obj_abbrev = data_traffic.get_objective_abbrev() return render_template("query_ranking_traffic.html", username=username, traffic_unranked=-1*np.array(traffic_unranked), traffic_ranked=-1*np.array(traffic_ranked), IDs_ranked=IDs_ranked, IDs_unranked=IDs_unranked, obj_names=obj_names, obj_abbrev=obj_abbrev)
def continue_pairwise(username, side_clicked, button_type): # get the dataset for this user dataset_user = utils_users.get_gp_dataset( username, 'pairwise', num_objectives=specs_traffic.NUM_OBJECTIVES) # initialise the acquirer which picks new datapoints acquirer = DiscreteAcquirer( input_domain=data_traffic.get_traffic_results(), query_type='pairwise', seed=specs_traffic.SEED) # add collected datapoints to acquirer acquirer.history = dataset_user.datapoints # add virtual comparisons in first few queries for i in range(np.min([dataset_user.datapoints.shape[0], 6])): dataset_user.add_single_comparison(dataset_user.datapoints[i], data_traffic.get_traffic_min()) dataset_user.add_single_comparison(data_traffic.get_traffic_max(), dataset_user.datapoints[i]) # add linear prior in first few queries if acquirer.history.shape[0] < 6: prior_mean_type = 'linear' else: prior_mean_type = 'zero' # intialise the GP gp = GPPairwise(num_objectives=specs_traffic.NUM_OBJECTIVES, seed=specs_traffic.SEED, prior_mean_type=prior_mean_type) # add collected datapoints to GP gp.update(dataset_user) # get the best job so far job_best_idx = dataset_user.comparisons[-1, 0] job_best = dataset_user.datapoints[job_best_idx] # let acquirer pick new point job_new = acquirer.get_next_point(gp, dataset_user) # sort according to what user did last round if side_clicked == "1": item1 = job_best item2 = job_new else: item1 = job_new item2 = job_best # get the names of the objectives obj_names = data_traffic.get_objective_names() obj_abbrev = data_traffic.get_objective_abbrev() if button_type == 'next': return render_template("query_pairwise_traffic.html", username=username, item1=-item1, item2=-item2, side_clicked=side_clicked, obj_names=obj_names, obj_abbrev=obj_abbrev) elif button_type == 'end': # save end time utils_users.save_experiment_end_time(username, 'pairwise') # register that this experiment was done utils_users.update_experiment_status(username=username, query_type='pairwise') return redirect('start_experiment/{}'.format(username)) else: raise NotImplementedError('Button type unknown.')
def start(username): # get the start time for this user utils_users.get_experiment_start_time(username, 'ranking') # get the dataset for this user user_dataset = utils_users.get_gp_dataset( username, 'ranking', num_objectives=specs_jobs.NUM_OBJECTIVES) # if no data has been collected yet, we only display two starting jobs if user_dataset.comparisons.shape[0] == 0: # delete any datapoint in the user's dataset (in case experiment was aborted) user_dataset.datapoints = np.empty((0, specs_jobs.NUM_OBJECTIVES)) # get the starting points from the acquirer job1, job2 = utils_jobs.get_next_start_jobs(username) # add jobs to dataset of user job1_idx = user_dataset._add_single_datapoint(job1) job2_idx = user_dataset._add_single_datapoint(job2) # save dataset utils_users.update_gp_dataset(username, user_dataset, 'ranking') # convert into displayable format job1 = utils_jobs.job_array_to_job_dict(job1) job2 = utils_jobs.job_array_to_job_dict(job2) # add ID to the above dictionaries (equals the index in the dataset job1['ID'] = job1_idx job2['ID'] = job2_idx # put jobs we want to display in the respective lists jobs_unranked = [job1, job2] jobs_ranked = [] # otherwise, we show the previous ranking and pick a new point according to that else: # intialise the GP gp = GPPairwise(num_objectives=specs_jobs.NUM_OBJECTIVES, seed=specs_jobs.SEED) # initialise acquirer acquirer = DiscreteAcquirer(input_domain=utils_jobs.get_jobs(), query_type='clustering', seed=specs_jobs.SEED) # add collected datapoints to acquirer acquirer.history = user_dataset.datapoints # add collected datapoints to GP gp.update(user_dataset) # let acquirer pick new point job_new = acquirer.get_next_point(gp, user_dataset) # add that point to the dataset and save job_new_idx = user_dataset._add_single_datapoint(job_new) utils_users.update_gp_dataset(username, user_dataset, 'ranking') # convert job to dictionary job_new = utils_jobs.job_array_to_job_dict(job_new) # add the ID job_new['ID'] = job_new_idx # put into list of jobs that need to be ranked jobs_unranked = [job_new] # get ranking so far ranking = utils_users.get_ranking(username) # get the job information from that ranking and convert to dictionaries jobs_ranked = user_dataset.datapoints[ranking] jobs_ranked = [ utils_jobs.job_array_to_job_dict(job) for job in jobs_ranked ] # add the IDs for i in range(len(ranking)): jobs_ranked[i]['ID'] = ranking[i] return render_template("query_ranking_jobs.html", username=username, jobs_unranked=jobs_unranked, jobs_ranked=jobs_ranked)
class DecisionMaker: def __init__(self, num_objectives, seed, utility_function=None, user_std=0.1, temp_linear_prior=False, add_virtual_comp=False, add_virt_comp_global=False, keep_set_small=False, thresh_dist=0.001): self.random_state = np.random.RandomState(seed) if utility_function is None: user = UserPreference(num_objectives, user_std, seed) self.utility_function = user.get_preference else: self.utility_function = lambda x, add_noise: utility_function( x) + int(add_noise) * self.random_state.normal(0, user_std) self.dataset = DatasetPairwise(num_objectives) self.gp = GPPairwise(num_objectives, kernel_width=0.45, std_noise=user_std, seed=seed) self.temp_linear_prior = temp_linear_prior self.add_virtual_comp = add_virtual_comp self.min_point = None self.max_point = None self.add_virt_comp_global = add_virt_comp_global self.keep_set_small = keep_set_small self.thresh_dist = thresh_dist def true_utility(self, vect): return self.utility_function(vect, add_noise=False) def set_prior(self): if self.temp_linear_prior: if self.dataset.comparisons.shape[0] < 10: self.gp.prior_mean_type = 'linear' else: self.gp.prior_mean_type = 'zero' def noisy_compare(self, vect1, vect2, dont_update=False): """ adds comparison to dataset, returns boolean vect1>vect """ if self.keep_set_small and self.dataset.datapoints.shape[0] > 0: dist1 = np.linalg.norm(self.dataset.datapoints - vect1, axis=1) if np.min(dist1) < self.thresh_dist: vect1 = self.dataset.datapoints[np.argmin(dist1)] dist2 = np.linalg.norm(self.dataset.datapoints - vect2, axis=1) if np.min(dist2) < self.thresh_dist: vect2 = self.dataset.datapoints[np.argmin(dist1)] utl1 = self.utility_function(vect1, add_noise=True) utl2 = self.utility_function(vect2, add_noise=True) if utl1 > utl2: self.dataset.add_single_comparison(vect1, vect2) else: self.dataset.add_single_comparison(vect2, vect1) if not dont_update: self.update_gp(self.dataset) return utl1 > utl2 def sample(self, sample_points): """ returns a sample of the GP utility at sample_points """ # if requested, add virtual comparisons to nadir and utopian point if self.add_virtual_comp: self.virtual_comp(sample_points) if self.add_virt_comp_global: self.virtual_comp_global(sample_points) return self.gp.sample(sample_points) def update_gp(self, dataset): self.set_prior() self.gp.update(dataset) def virtual_comp(self, sample_points): # put list of PF vectors into matrix sample_points = np.vstack(sample_points) # check if the pareto front is more than just a single point if sample_points.shape[0] == 1: # make sure the correct data is used (we might've added virtual comparisons last round) if self.dataset.datapoints.shape[0] > 0: self.update_gp(self.dataset) else: # get utopian and nadir points utopian_point = np.max(sample_points, axis=0) nadir_point = np.min(sample_points, axis=0) # copy current dataset dataset_copy = copy.deepcopy(self.dataset) # add virtual comparisons for i in range(sample_points.shape[0]): vect = sample_points[i] dataset_copy.add_single_comparison(utopian_point, vect) dataset_copy.add_single_comparison(vect, nadir_point) # update the GP using this dataset (note: the GP forgets everything it knew before) self.update_gp(dataset_copy) def virtual_comp_global(self, sample_points): # put list of PF vectors into matrix sample_points = np.vstack(sample_points) if self.max_point is None: self.max_point = np.max(sample_points, axis=0) else: stacked_points = np.vstack((sample_points, self.max_point)) self.max_point = np.max(stacked_points, axis=0) if self.min_point is None: self.min_point = np.min(sample_points, axis=0) else: stacked_points = np.vstack((sample_points, self.min_point)) self.min_point = np.min(stacked_points, axis=0) # copy current dataset dataset_copy = copy.deepcopy(self.dataset) # add virtual comparisons for i in range(sample_points.shape[0]): vect = sample_points[i] if np.sum(np.abs(vect - self.max_point)) != 0: dataset_copy.add_single_comparison(self.max_point, vect) if np.sum(np.abs(vect - self.min_point)) != 0: dataset_copy.add_single_comparison(vect, self.min_point) # update the GP using this dataset (note: the GP forgets everything it knew before) if dataset_copy.comparisons.shape[0] > 0: self.update_gp(dataset_copy)