def benchmark_param_dicts(params_dict_list, **kwargs): ''' Pass this a list of dicts, where each has the different parameters you want to benchmark. It then iterates through this list, doing a benchmark for each dict. ''' # Create dir for the results of this benchmark if one isn't provided. benchmark_dir = kwargs.get('benchmark_dir', None) if benchmark_dir is None: benchmark_dir = os.path.join(path_utils.get_output_dir(), 'Benchmark_{}'.format(path_utils.get_date_str())) os.mkdir(benchmark_dir) for d in params_dict_list: # If a run_fname_label is provided, use that to create a more informative dir name. # Otherwise, just use the date. if 'run_fname_label' in d.keys(): run_fname_label = d['run_fname_label'] else: run_fname_label = 'vary_params' # Base dir for this specific benchmark params_dir = os.path.join(benchmark_dir, '{}_{}'.format(run_fname_label, path_utils.get_date_str())) os.mkdir(params_dir) # To hold the actual runs (FF) runs_dir = os.path.join(params_dir, 'runs') os.mkdir(runs_dir) print('\n\nNow benchmarking params:') pp.pprint(d, width=1) print('\n\n') benchmark_dict = benchmark_param_dict(d, kwargs.get('N_dist', 10), kwargs.get('N_gen', 100), runs_dir) # Add to dict d['benchmark_dict'] = deepcopy(benchmark_dict) # Make plots for this benchmark if 'run_plot_label' in d.keys(): run_plot_label = d['run_plot_label'] else: run_plot_label = run_fname_label # Plots for benchmark fname = os.path.join(params_dir, f'{run_fname_label}_solve_gens_dist.png') plot_benchmark_dist(run_plot_label, benchmark_dict['solve_gens'], 'Solve generation', fname) fname = os.path.join(params_dir, f'{run_fname_label}_best_scores_dist.png') plot_benchmark_dist(run_plot_label, benchmark_dict['best_scores'], 'Best score', fname) # Return passed list, which should have dicts # modified with the results return params_dict_list
def benchmark_envs(env_list, **kwargs): ''' Iterates over a list of env names you give it, benchmarking it and recording info. For each env, it ''' N_dist = kwargs.get('N_dist', 10) # How many evolutions to run, to form a distribution N_gen = kwargs.get('N_gen', 1000) # Create dir for the results of this benchmark. benchmark_dir = os.path.join(path_utils.get_output_dir(), 'Benchmark_{}'.format(path_utils.get_date_str())) os.mkdir(benchmark_dir) # Dict to hold results on timing, etc. benchmark_dict = {} for env_name in env_list: print(f'\nBenchmarking env {env_name} now...\n') # Create a dir for this env. env_dir = os.path.join(benchmark_dir, env_name) os.mkdir(env_dir) env_runs_dir = os.path.join(env_dir, 'runs') os.mkdir(env_runs_dir) param_dict = deepcopy(kwargs) param_dict['env_name'] = env_name benchmark_dict[env_name] = benchmark_param_dict(param_dict, N_dist, N_gen, env_runs_dir) benchmark_dict[env_name]['env_dir'] = env_dir # Save distributions to file with open(os.path.join(benchmark_dir, 'benchmark_stats.json'), 'w+') as f: json.dump(benchmark_dict, f, indent=4) # Plot each env dist. for k,v in benchmark_dict.items(): # Makes sure the run finished if 'solve_gens' in v.keys(): fname = os.path.join(v['env_dir'], f'{k}_solve_gens_dist.png') plot_benchmark_dist(k, v['solve_gens'], 'Solve generation', fname) fname = os.path.join(v['env_dir'], f'{k}_best_scores_dist.png') plot_benchmark_dist(k, v['best_scores'], 'Best score', fname) fname = os.path.join(v['env_dir'], f'{k}_all_scores_dist.png') plot_benchmark_dist(k, v['all_scores'], 'All scores', fname, N_bins=20, plot_log=True)
def ramp_difficulty(train_class, **kwargs): ''' This should hopefully "ramp up" the difficulty, letting it solve it for the easy case, and then adapt to the harder cases. ''' base_dir = os.path.join(path_utils.get_output_dir(), 'ramp_difficulty_{}'.format(path_utils.get_date_str())) os.mkdir(base_dir) kwargs['base_dir'] = base_dir cm = train_class(**kwargs) #cat_speed_rels = [3.0, 3.1, 3.2, 3.3, 3.35, 3.4, 3.45, 3.5, 3.55, 3.6, 3.7, 3.8, 3.9, 4.0] N_eps_chunk = kwargs.get('N_eps', 5000) cat_speed_rels = { 3.1 : 1*N_eps_chunk, 3.2 : 1*N_eps_chunk, 3.3 : 1*N_eps_chunk, 3.4 : 2*N_eps_chunk, 3.5 : 2*N_eps_chunk, 3.6 : 2*N_eps_chunk, 3.65 : 2*N_eps_chunk, 3.7 : 2*N_eps_chunk, 3.75 : 2*N_eps_chunk, 3.8 : 2*N_eps_chunk, } c_s_r_fname = os.path.join(cm.dir, 'c_s_r_list.json') with open(c_s_r_fname, 'w+') as f: json.dump({'cat_speed_rels' : cat_speed_rels}, f, indent=4) for i, (c_s, N_eps) in enumerate(cat_speed_rels.items()): print(f'\nRunning with cat_speed_rel = {c_s} now!\n') if kwargs.get('reset_buffer', False): cm.ER.reset_buffer() cm.agent.set_cat_speed(c_s) if i==0: cm.train(N_eps, kwargs.get('N_steps', 500), reset_hist=True) else: cm.train(N_eps, kwargs.get('N_steps', 500), reset_hist=False) cm.train_epochs.append(cm.total_step) cm.plot_train(save_plot=True, show_plot=False) cm.save_model()
def __init__(self, env_name, **kwargs): # Create env, create agent self.setup_env(env_name) self.agent = Agent.Agent(self.env, **kwargs) self.noise_sd = 1.0 self.max_episode_steps = kwargs.get("max_episode_steps", 500) # Get the base dir, which is where runs will be saved to. Default # is /output/ base_dir = kwargs.get("base_dir", path_utils.get_output_dir()) # Datetime string for labeling the run self.dt_str = path_utils.get_date_str() # If you don't pass anything, it will create a dir in base_dir to # hold the results of this run, but you can supply your own externally. self.run_dir = kwargs.get("run_dir", None) if self.run_dir is None: self.run_dir = os.path.join( base_dir, f"{self.env_name}_sample_{self.dt_str}") os.mkdir(self.run_dir) # For saving the parameters used for the run. Run last in __init__(). if kwargs.get("load_params_from_dir", False): self.load_params_dict() else: self.run_params = kwargs.copy() self.save_params_dict() #### Plot params self.plot_pt_alpha = 0.2 self.plot_label_params = {"fontsize": 18} self.plot_tick_params = {"fontsize": 13} self.plot_title_params = {"fontsize": 18} self.plot_params = { "plot_pt_alpha": self.plot_pt_alpha, "plot_label_params": self.plot_label_params, "plot_tick_params": self.plot_tick_params, "plot_title_params": self.plot_title_params, }
def __init__(self, env_name, **kwargs): # The search method used. Default is Random Weight Guessing (RWG). self.search_method = kwargs.get('search_method', 'RWG') assert self.search_method in [ 'RWG', 'gaussian_noise_hill_climb', 'grid_search', 'bin_grid_search', 'sparse_bin_grid_search' ], 'Must supply valid search_method!' # Create env, create agent self.setup_env(env_name) self.agent = Agent.Agent(self.env, **kwargs) self.noise_sd = 1.0 self.max_episode_steps = kwargs.get('max_episode_steps', 500) # Get the base dir, which is where runs will be saved to. Default # is /output/ base_dir = kwargs.get('base_dir', path_utils.get_output_dir()) # Datetime string for labeling the run self.dt_str = path_utils.get_date_str() # If you don't pass anything, it will create a dir in base_dir to # hold the results of this run, but you can supply your own externally. self.run_dir = kwargs.get('run_dir', None) if self.run_dir is None: self.run_dir = os.path.join(base_dir, f'{self.env_name}_evo_{self.dt_str}') os.mkdir(self.run_dir) # For saving the parameters used for the run. Run last in __init__(). if kwargs.get('load_params_from_dir', False): self.load_params_dict() else: self.run_params = kwargs.copy() self.save_params_dict() #### Plot params self.plot_pt_alpha = 0.2 self.plot_label_params = {'fontsize': 14} self.plot_tick_params = {'fontsize': 11} self.plot_title_params = {'fontsize': 16}
def load_best_agent_sample_from_dir(dir): assert os.path.exists(dir), f"Dir must exist to load from! Dir {dir} DNE." run_params_json_fname = os.path.join(dir, "run_params.json") assert os.path.exists( run_params_json_fname ), f"run_params.json must exist in dir to load from! {run_params_json_fname} DNE." sample_dict_fname = os.path.join(dir, "sample_stats.json") assert os.path.exists( sample_dict_fname ), f"sample_stats.json must exist in dir to load from! {sample_dict_fname} DNE." # Get run_params to recreate the object with open(run_params_json_fname, "r") as f: run_params = json.load(f) run_params["run_dir"] = os.path.join(path_utils.get_output_dir(), "tmp") del run_params["dt_str"] print("\nPassing this dict to create a new Sample():") print(run_params) print() env_name = run_params.pop("env_name") s = Sample(env_name, **run_params) with open(sample_dict_fname, "r") as f: sample_dict = json.load(f) print(sample_dict.keys()) best_weights = sample_dict["best_weights"] print("\nLoading best weights:") print(best_weights) best_weights_mat = [np.array(w) for w in best_weights] s.agent.set_weight_matrix(best_weights_mat) return s
'LR': 2 * 10**-4, 'N_batch': 5, 'optim': 'Adam', 'beta_entropy': 10**-5, 'sigma_min': 10**-5, 'clamp_grad': 10, 'cat_speed_rel': 3.2, 'noise_sigma': 0.5 } vary_dict = {'cat_speed_rel': [3.3, 3.4], 'LR': [2 * 10**-3]} rt.hyperparam_search_const(aux_functions.run_train_sequence, center_dict, vary_dict, path_utils.get_output_dir(), N_runs=5, save_R=True, center_run=True) #################################### DDPG difficulty tests center_dict = { 'train_class': CatMouse_DDPG, 'N_eps': 20000, 'noise_sigma': 0.5, 'max_ep_steps': 100, 'N_steps': 100, 'decay_noise': True, 'cat_speed_rel': 3.0 }
''' DDPG implementation. ''' default_kwargs = { 'gamma': 0.99, 'optim': 'Adam', 'LR_actor': 1 * 10**-4, 'LR_critic': 1 * 10**-3, 'clamp_grad': 10000.0, 'hidden_size': 200, 'init_weights': True, 'base_dir': path_utils.get_output_dir(), 'noise_sigma': 0.2, 'noise_theta': 0.15, 'noise_dt': 10**-2, 'noise_mu': 'zero', 'noise_decay_limit': 10**-3, 'max_buffer_size': 10**6, 'ER_batch_size': 64, 'tau': 0.001, 'decay_noise': True, 'noise_method': 'OU' } class CatMouse_DDPG: def __init__(self, **kwargs):
import path_utils import os import Sample best_agent_dir = "" dir = os.path.join(path_utils.get_output_dir(), best_agent_dir) s = Sample.load_best_agent_sample_from_dir(dir) s.run_episode(show_ep=True)
# Append even if there was no improvement best_scores.append(best_score) # Plot results print(f'Best score achieved: {best_score}') print(f'Best weight matrix: \n{best_weights}') plt.plot(best_scores, color='tomato', label='Best FF found') plt.plot(all_scores, color='dodgerblue', label='All FF') plt.xlabel('Episode') plt.ylabel('Fitness Function (FF)') plt.legend() plt.title('CartPole-v0 environment') plt.savefig(os.path.join(path_utils.get_output_dir(), 'NE_cartpole_FF.png')) plt.show() #env = gym.wrappers.Monitor(env, 'video', force = True) # Set to best weights found, run episode and show net.set_weights(best_weights) obs = env.reset() score = 0 done = False while not done: env.render() action = net.get_action(obs) obs, rew, done, info = env.step(action) score += rew print(f'Final score: {score}')
def benchmark_vary_params(constant_params_dict, vary_params_dict, **kwargs): ''' This is a convenience function to easily vary parameters for benchmarking. You pass it constant_params_dict, which is a dict with the values that you want to remain constant between runs. Then, pass it vary_params_dict, which should have each parameter that you want to vary as a list of the values it should take. Example: constant_params_dict = { 'env_name' : 'CartPole-v0', 'N_gen' : 1000, 'N_dist' : 100, 'NN' : 'FFNN_multilayer' } vary_params_dict = { 'N_hidden_units' : [2, 4, 8], 'act_fn' : ['tanh', 'relu'] } This will do 3*2 = 6 runs, for each of the combinations of varying parameters. ''' # Create informative dir name vary_params = list(vary_params_dict.keys()) benchmark_dir = os.path.join( path_utils.get_output_dir(), 'Benchmark_vary_{}_{}'.format('_'.join(vary_params), path_utils.get_date_str())) print(f'\nSaving benchmark run to {benchmark_dir}') os.mkdir(benchmark_dir) combined_params = {**constant_params_dict, **vary_params_dict} # Save params to file with open(os.path.join(benchmark_dir, 'run_params.json'), 'w+') as f: json.dump(combined_params, f, indent=4) # Flatten list, pass to other function flat_param_list = vary_params_cross_products(constant_params_dict, vary_params_dict) flat_param_list = benchmark_param_dicts(flat_param_list, benchmark_dir=benchmark_dir, **kwargs) # Parse results for d in flat_param_list: benchmark_dict = d['benchmark_dict'] best_scores = benchmark_dict['best_scores'] d['mu_best'] = np.mean(best_scores) d['sigma_best'] = np.std(best_scores) solve_gens = benchmark_dict['solve_gens'] d['mu_solve_gens'] = np.mean(solve_gens) d['sigma_solve_gens'] = np.std(solve_gens) #pp.pprint(d, width=1) # Get rid of this now d.pop('benchmark_dict') # Save results to csv for later parsing/plotting df = pd.DataFrame(flat_param_list) print(tabulate(df, headers=df.columns.values, tablefmt='psql')) df_fname = os.path.join(benchmark_dir, 'vary_benchmark_results.csv') df.to_csv(df_fname, index=False) # Only need to do if more than 2 params were varied. if len(vary_params) >= 2: # Create heatmap plots dir heatmap_dir = os.path.join(benchmark_dir, 'heatmap_plots') print(f'\nSaving heatmap plots to {heatmap_dir}') os.mkdir(heatmap_dir) # Iterate over all unique pairs of vary params, plot heatmaps of them for pair in itertools.combinations(vary_params, 2): print(f'Making heatmaps for {pair}') other_params_flat = [(k, v) for k,v in vary_params_dict.items() if k not in pair] other_params = [x[0] for x in other_params_flat] other_vals = [x[1] for x in other_params_flat] print(f'other params: {other_params}') # Create dir for specific pivot pivot_name = 'vary_{}_{}'.format(*pair) pivot_dir = os.path.join(heatmap_dir, pivot_name) os.mkdir(pivot_dir) # Select for each of the combos of the other params. for other_params_set in itertools.product(*other_vals): other_sel_dict = dict(zip(other_params, other_params_set)) fname_label = path_utils.param_dict_to_fname_str(other_sel_dict) df_sel = df.loc[(df[list(other_sel_dict)] == pd.Series(other_sel_dict)).all(axis=1)] heatmap_plot(df_sel, *pair, 'mu_best', pivot_dir, label=fname_label) heatmap_plot(df_sel, *pair, 'mu_solve_gens', pivot_dir, label=fname_label)
import path_utils import os import Sample import gym_raas replot_dir = "" dir = os.path.join(path_utils.get_output_dir(), replot_dir) s = Sample.replot_sample_dict_from_dir(dir)