def _run_pre_iteration_tasks(self, transformed_hyperparams): # Print and store hypervolume from previous run hypervolume = self.get_untransformed_hypervolume() self._saved_hypervolumes.append(hypervolume) print("Hypervolume is: {}".format(hypervolume)) # Run check to ensure previously acquired point was learned properly, then save current hyperparams for next run. self._estimate_last_point() self._previous_hyperparams = transformed_hyperparams # Untransform hyperparams and put into our expected format (a dictionary of hyperparam names to vals). hyperparams_dict = self._convert_model_input_to_problem_instance_input( transformed_hyperparams) # Predict privacy and utility before training model. estimated_privacy, estimated_utility = self.predict( transformed_hyperparams, transform_hyperparams=False) print("curr point: {}".format(hyperparams_dict)) print("curr pts estimated (eps, err): {}".format( (estimated_privacy, estimated_utility))) self._plot_and_save() self._optimization_point_number += 1 save_object( '{}/optimization-hyperparameters'.format(self._results_dir), hyperparams_dict, 'round-{}'.format(self._optimization_point_number)) return hyperparams_dict
def convert_from_pydiffpriv_to_autodp(num_training_examples, results_directory, results_name='full_results'): # get results original_results_base = '{}/{}'.format(results_directory, results_name) with open('{}.pkl'.format(original_results_base), 'rb') as f: original_results = pickle.load(f) # for each result, if privacy was computed, recompute privacy with autodp auto_dp_results = [] for (hyperparams, pydiffpriv_privacy, utility) in original_results: if pydiffpriv_privacy > 0: autodp_privacy = compute_epsilon(hyperparams, num_training_examples) else: autodp_privacy = pydiffpriv_privacy # store result in same format as before auto_dp_results.append((hyperparams, autodp_privacy, utility)) # rename old results to results_name+'-pydiffpriv' if it doesn't already exist if not os.path.isfile('{}-pydiffpriv.pkl'.format(original_results_base)): os.rename('{}.pkl'.format(original_results_base), '{}-pydiffpriv.pkl'.format(original_results_base)) os.rename('{}.txt'.format(original_results_base), '{}-pydiffpriv.txt'.format(original_results_base)) else: print('Old pydiffpriv files already exist. Continuing anyways.') # save new results to results_name save_object(results_directory, auto_dp_results, results_name)
def _generate_random_hyperparameter_instances(self): print("Generating random hyperparameters...", end='') self._hyperparam_instances = [] for i in range(self._num_instances): # Generate a single random set of hyperparameters hyperparam_instance = {} for name, hyperparam_distribution in self._hyperparam_distributions.items(): hyperparam_instance[name] = self._random_sample(hyperparam_distribution) for j in range(self._num_replications): self._hyperparam_instances.append(hyperparam_instance) save_object(self._results_dir, self._hyperparam_instances, 'hyperparams') print("done. Generated and saved.")
def _plot_and_save(self): self._plot_empirical_pareto_frontier( suffix=str(self._optimization_point_number)) # save_object('{}/saved_gp_models'.format(self._results_dir), # [self._privacy_gp, self._utility_gp], # 'round-{}'.format(self._optimization_point_number), # save_text=False) save_object('{}/saved_optimization_state'.format(self._results_dir), self, 'round-{}'.format(self._optimization_point_number), save_text=False)
def _create_and_save_plots(self, rounds, eps_sequence, loss_sequence, final_eps, test_accuracy): # Estabilish output directory dir_name = np.abs( hash((time.time(), frozenset(self._hyperparams.items())))) if not self._output_base_dir: plots_subdir = self._name plots_dir = 'results/instance-results/{}/{}'.format( plots_subdir, dir_name) else: plots_dir = '{}/instance-results/{}'.format( self._output_base_dir, dir_name) import os os.makedirs(plots_dir, exist_ok=True) # Save hyperparameter and results info with plots final_results = (self._hyperparams, final_eps, test_accuracy) save_object(plots_dir, final_results, 'final_results') import matplotlib.pyplot as plt # import here for weird matplotlib+multiprocessign compatibility issue # Only create privacy plot if privacy was accumulated if len(eps_sequence) > 0: # Privacy results plt.figure(num=1, figsize=(12, 8), dpi=80, facecolor='w', edgecolor='k') plt.grid(True, which='both') plt.plot(range(rounds), eps_sequence) plt.xlabel('rounds', fontsize=14) plt.ylabel('eps', fontsize=14) plt.title('Overall (eps,delta)-DP over composition.') plt.savefig('{}/privacy-results.png'.format(plots_dir)) plt.close() # Loss results plt.figure(num=None, figsize=(8, 6)) plt.plot(range(len(loss_sequence)), loss_sequence) ma_window = 50 plt.plot(range(ma_window - 1, len(loss_sequence)), self.moving_average(loss_sequence, n=ma_window), color='red') plt.grid(True, which='both') plt.xlabel('\"rounds\"', fontsize=14) plt.ylabel('average loss', fontsize=14) plt.draw() plt.savefig('{}/loss-results.png'.format(plots_dir)) plt.close()
def _store_results(self): full_results = [(self._hyperparam_instances[i], self._results[i][0], self._results[i][1]) for i in range(len(self._hyperparam_instances))] save_object(self._results_dir, full_results, 'full_results')