def __init__(self, **kwargs): global default_kwargs #self.agent = CatMouseAgent() self.agent = CatMouseAgent(**kwargs) self.N_state_terms = len(self.agent.getStateVec()) self.N_actions = self.agent.N_actions for k, v in kwargs.items(): if k in default_kwargs.keys(): default_kwargs[k] = v else: #assert k in default_kwargs.keys(), 'key error' print( f'Passed parameter {k} not in default_kwargs dict! Check') self.fname_base = 'CatMouse_DDPG_' + path_utils.get_date_str() if kwargs.get('dir', None) is None: self.dir = os.path.join(default_kwargs['base_dir'], self.fname_base) os.mkdir(self.dir) print(f'\n\nMade dir {self.dir} for run...\n\n') else: self.dir = kwargs.get('dir', None) self.save_params_json(kwargs, 'params_passed') self.save_params_json(default_kwargs, 'params_all') self.gamma = default_kwargs['gamma'] self.optim = default_kwargs['optim'] self.LR_actor = default_kwargs['LR_actor'] self.LR_critic = default_kwargs['LR_critic'] self.hidden_size = default_kwargs['hidden_size'] self.init_weights = default_kwargs['init_weights'] self.clamp_grad = default_kwargs['clamp_grad'] self.noise_sigma = default_kwargs['noise_sigma'] self.noise_theta = default_kwargs['noise_theta'] self.noise_dt = default_kwargs['noise_dt'] self.noise_mu = default_kwargs['noise_mu'] self.noise_decay_limit = default_kwargs['noise_decay_limit'] self.max_buffer_size = default_kwargs['max_buffer_size'] self.ER_batch_size = default_kwargs['ER_batch_size'] self.tau = default_kwargs['tau'] self.decay_noise = default_kwargs['decay_noise'] self.noise_method = default_kwargs['noise_method'] self.setup_NN() #self.tb_writer = SummaryWriter() self.dat_fname_file = None if 'base_dir' in default_kwargs.keys(): no_base_dir_kwargs = deepcopy(default_kwargs) del no_base_dir_kwargs[ 'base_dir'] # To get rid of this for the center dict self.train_plot_title = path_utils.linebreak_every_n_spaces( path_utils.param_dict_to_label_str(no_base_dir_kwargs)) self.ER = ExpReplay(self.max_buffer_size, self.ER_batch_size)
def save_title_db_dict(d): root_id = [v['id'] for v in d.values() if v['depth'] == 0][0] date_str = path_utils.get_date_str() fname = 'title_dict_id_{}_runtime_{}.json'.format(root_id, date_str) with open(fname, 'w') as f: json.dump(d, f, indent=4)
def benchmark_param_dicts(params_dict_list, **kwargs): ''' Pass this a list of dicts, where each has the different parameters you want to benchmark. It then iterates through this list, doing a benchmark for each dict. ''' # Create dir for the results of this benchmark if one isn't provided. benchmark_dir = kwargs.get('benchmark_dir', None) if benchmark_dir is None: benchmark_dir = os.path.join(path_utils.get_output_dir(), 'Benchmark_{}'.format(path_utils.get_date_str())) os.mkdir(benchmark_dir) for d in params_dict_list: # If a run_fname_label is provided, use that to create a more informative dir name. # Otherwise, just use the date. if 'run_fname_label' in d.keys(): run_fname_label = d['run_fname_label'] else: run_fname_label = 'vary_params' # Base dir for this specific benchmark params_dir = os.path.join(benchmark_dir, '{}_{}'.format(run_fname_label, path_utils.get_date_str())) os.mkdir(params_dir) # To hold the actual runs (FF) runs_dir = os.path.join(params_dir, 'runs') os.mkdir(runs_dir) print('\n\nNow benchmarking params:') pp.pprint(d, width=1) print('\n\n') benchmark_dict = benchmark_param_dict(d, kwargs.get('N_dist', 10), kwargs.get('N_gen', 100), runs_dir) # Add to dict d['benchmark_dict'] = deepcopy(benchmark_dict) # Make plots for this benchmark if 'run_plot_label' in d.keys(): run_plot_label = d['run_plot_label'] else: run_plot_label = run_fname_label # Plots for benchmark fname = os.path.join(params_dir, f'{run_fname_label}_solve_gens_dist.png') plot_benchmark_dist(run_plot_label, benchmark_dict['solve_gens'], 'Solve generation', fname) fname = os.path.join(params_dir, f'{run_fname_label}_best_scores_dist.png') plot_benchmark_dist(run_plot_label, benchmark_dict['best_scores'], 'Best score', fname) # Return passed list, which should have dicts # modified with the results return params_dict_list
def benchmark_envs(env_list, **kwargs): ''' Iterates over a list of env names you give it, benchmarking it and recording info. For each env, it ''' N_dist = kwargs.get('N_dist', 10) # How many evolutions to run, to form a distribution N_gen = kwargs.get('N_gen', 1000) # Create dir for the results of this benchmark. benchmark_dir = os.path.join(path_utils.get_output_dir(), 'Benchmark_{}'.format(path_utils.get_date_str())) os.mkdir(benchmark_dir) # Dict to hold results on timing, etc. benchmark_dict = {} for env_name in env_list: print(f'\nBenchmarking env {env_name} now...\n') # Create a dir for this env. env_dir = os.path.join(benchmark_dir, env_name) os.mkdir(env_dir) env_runs_dir = os.path.join(env_dir, 'runs') os.mkdir(env_runs_dir) param_dict = deepcopy(kwargs) param_dict['env_name'] = env_name benchmark_dict[env_name] = benchmark_param_dict(param_dict, N_dist, N_gen, env_runs_dir) benchmark_dict[env_name]['env_dir'] = env_dir # Save distributions to file with open(os.path.join(benchmark_dir, 'benchmark_stats.json'), 'w+') as f: json.dump(benchmark_dict, f, indent=4) # Plot each env dist. for k,v in benchmark_dict.items(): # Makes sure the run finished if 'solve_gens' in v.keys(): fname = os.path.join(v['env_dir'], f'{k}_solve_gens_dist.png') plot_benchmark_dist(k, v['solve_gens'], 'Solve generation', fname) fname = os.path.join(v['env_dir'], f'{k}_best_scores_dist.png') plot_benchmark_dist(k, v['best_scores'], 'Best score', fname) fname = os.path.join(v['env_dir'], f'{k}_all_scores_dist.png') plot_benchmark_dist(k, v['all_scores'], 'All scores', fname, N_bins=20, plot_log=True)
def ramp_difficulty(train_class, **kwargs): ''' This should hopefully "ramp up" the difficulty, letting it solve it for the easy case, and then adapt to the harder cases. ''' base_dir = os.path.join(path_utils.get_output_dir(), 'ramp_difficulty_{}'.format(path_utils.get_date_str())) os.mkdir(base_dir) kwargs['base_dir'] = base_dir cm = train_class(**kwargs) #cat_speed_rels = [3.0, 3.1, 3.2, 3.3, 3.35, 3.4, 3.45, 3.5, 3.55, 3.6, 3.7, 3.8, 3.9, 4.0] N_eps_chunk = kwargs.get('N_eps', 5000) cat_speed_rels = { 3.1 : 1*N_eps_chunk, 3.2 : 1*N_eps_chunk, 3.3 : 1*N_eps_chunk, 3.4 : 2*N_eps_chunk, 3.5 : 2*N_eps_chunk, 3.6 : 2*N_eps_chunk, 3.65 : 2*N_eps_chunk, 3.7 : 2*N_eps_chunk, 3.75 : 2*N_eps_chunk, 3.8 : 2*N_eps_chunk, } c_s_r_fname = os.path.join(cm.dir, 'c_s_r_list.json') with open(c_s_r_fname, 'w+') as f: json.dump({'cat_speed_rels' : cat_speed_rels}, f, indent=4) for i, (c_s, N_eps) in enumerate(cat_speed_rels.items()): print(f'\nRunning with cat_speed_rel = {c_s} now!\n') if kwargs.get('reset_buffer', False): cm.ER.reset_buffer() cm.agent.set_cat_speed(c_s) if i==0: cm.train(N_eps, kwargs.get('N_steps', 500), reset_hist=True) else: cm.train(N_eps, kwargs.get('N_steps', 500), reset_hist=False) cm.train_epochs.append(cm.total_step) cm.plot_train(save_plot=True, show_plot=False) cm.save_model()
def __init__(self, **kwargs): global default_kwargs self.agent = CatMouseAgent(**kwargs) self.N_state_terms = len(self.agent.getStateVec()) self.N_actions = self.agent.N_actions for k, v in kwargs.items(): if k in default_kwargs.keys(): default_kwargs[k] = v else: print( f'Passed parameter {k} not in default_kwargs dict! Check') if k != 'fname_note': #assert k in default_kwargs.keys(), 'key error' pass self.fname_base = 'CatMouse_A2C_' + path_utils.get_date_str() self.dir = os.path.join(default_kwargs['base_dir'], self.fname_base) os.mkdir(self.dir) print(f'\n\nMade dir {self.dir} for run...\n\n') self.save_params_json(kwargs, 'params_passed') self.save_params_json(default_kwargs, 'params_all') self.N_batch = default_kwargs['N_batch'] self.gamma = default_kwargs['gamma'] self.beta_entropy = default_kwargs['beta_entropy'] self.optim = default_kwargs['optim'] self.LR = default_kwargs['LR'] self.hidden_size = default_kwargs['hidden_size'] self.clamp_grad = default_kwargs['clamp_grad'] self.noise_sigma = default_kwargs['noise_sigma'] self.noise_theta = default_kwargs['noise_theta'] self.noise_dt = default_kwargs['noise_dt'] self.sigma_min = default_kwargs['sigma_min'] self.decay_noise = default_kwargs['decay_noise'] self.setup_NN() self.dat_fname_file = None if 'base_dir' in default_kwargs.keys(): no_base_dir_kwargs = deepcopy(default_kwargs) del no_base_dir_kwargs[ 'base_dir'] # To get rid of this for the center dict self.train_plot_title = path_utils.linebreak_every_n_spaces( path_utils.param_dict_to_label_str(no_base_dir_kwargs))
def __init__(self, env_name, **kwargs): # Create env, create agent self.setup_env(env_name) self.agent = Agent.Agent(self.env, **kwargs) self.noise_sd = 1.0 self.max_episode_steps = kwargs.get("max_episode_steps", 500) # Get the base dir, which is where runs will be saved to. Default # is /output/ base_dir = kwargs.get("base_dir", path_utils.get_output_dir()) # Datetime string for labeling the run self.dt_str = path_utils.get_date_str() # If you don't pass anything, it will create a dir in base_dir to # hold the results of this run, but you can supply your own externally. self.run_dir = kwargs.get("run_dir", None) if self.run_dir is None: self.run_dir = os.path.join( base_dir, f"{self.env_name}_sample_{self.dt_str}") os.mkdir(self.run_dir) # For saving the parameters used for the run. Run last in __init__(). if kwargs.get("load_params_from_dir", False): self.load_params_dict() else: self.run_params = kwargs.copy() self.save_params_dict() #### Plot params self.plot_pt_alpha = 0.2 self.plot_label_params = {"fontsize": 18} self.plot_tick_params = {"fontsize": 13} self.plot_title_params = {"fontsize": 18} self.plot_params = { "plot_pt_alpha": self.plot_pt_alpha, "plot_label_params": self.plot_label_params, "plot_tick_params": self.plot_tick_params, "plot_title_params": self.plot_title_params, }
def __init__(self, env_name, **kwargs): # The search method used. Default is Random Weight Guessing (RWG). self.search_method = kwargs.get('search_method', 'RWG') assert self.search_method in [ 'RWG', 'gaussian_noise_hill_climb', 'grid_search', 'bin_grid_search', 'sparse_bin_grid_search' ], 'Must supply valid search_method!' # Create env, create agent self.setup_env(env_name) self.agent = Agent.Agent(self.env, **kwargs) self.noise_sd = 1.0 self.max_episode_steps = kwargs.get('max_episode_steps', 500) # Get the base dir, which is where runs will be saved to. Default # is /output/ base_dir = kwargs.get('base_dir', path_utils.get_output_dir()) # Datetime string for labeling the run self.dt_str = path_utils.get_date_str() # If you don't pass anything, it will create a dir in base_dir to # hold the results of this run, but you can supply your own externally. self.run_dir = kwargs.get('run_dir', None) if self.run_dir is None: self.run_dir = os.path.join(base_dir, f'{self.env_name}_evo_{self.dt_str}') os.mkdir(self.run_dir) # For saving the parameters used for the run. Run last in __init__(). if kwargs.get('load_params_from_dir', False): self.load_params_dict() else: self.run_params = kwargs.copy() self.save_params_dict() #### Plot params self.plot_pt_alpha = 0.2 self.plot_label_params = {'fontsize': 14} self.plot_tick_params = {'fontsize': 11} self.plot_title_params = {'fontsize': 16}
def no_train_episode(self, N_steps): self.agent.initEpisode() ''' The GD is done with: -v_buffer (batch of V for each state) -sigma_buffer (batch of sd for each state) -r_buffer (batch of returned r for each action taken in each state) -pi_a_buffer (batch of pi_a for each action taken in each state) plot_episode() plots: -self.last_ep_*_hist (history of * over course of episode) other buffers saved but not used (for debugging): -s_buffer -a_buffer -mu_buffer ''' s_ep_hist = [] R_ep_hist = [] episode_ending = None tot_steps = 0 for t in range(N_steps): # DDPG stuff s = self.agent.getStateVec() # Get action from actor NN a = self.NN_actor.forward(torch.tensor( s, dtype=torch.float)).detach().numpy() # Iterate, get r, s_next r, s_next, done = self.agent.iterate(a) s_ep_hist.append(s) R_ep_hist.append(r) tot_steps = t if done: s_ep_hist.append(s_next) break last_r = R_ep_hist[-1] if last_r > 0.5: episode_ending = 'escaped' elif last_r < -0.2: episode_ending = 'caught' else: episode_ending = None traj_fname = plot_tools.save_traj_to_file( np.array(s_ep_hist), self.dir, iter='eval_ep_{}'.format(path_utils.get_date_str()), cat_speed_rel=self.agent.cat_speed_rel, mouse_caught=episode_ending) return { 'traj_fname': traj_fname, 'mouse_caught': episode_ending, 'tot_steps': tot_steps }
def benchmark_vary_params(constant_params_dict, vary_params_dict, **kwargs): ''' This is a convenience function to easily vary parameters for benchmarking. You pass it constant_params_dict, which is a dict with the values that you want to remain constant between runs. Then, pass it vary_params_dict, which should have each parameter that you want to vary as a list of the values it should take. Example: constant_params_dict = { 'env_name' : 'CartPole-v0', 'N_gen' : 1000, 'N_dist' : 100, 'NN' : 'FFNN_multilayer' } vary_params_dict = { 'N_hidden_units' : [2, 4, 8], 'act_fn' : ['tanh', 'relu'] } This will do 3*2 = 6 runs, for each of the combinations of varying parameters. ''' # Create informative dir name vary_params = list(vary_params_dict.keys()) benchmark_dir = os.path.join( path_utils.get_output_dir(), 'Benchmark_vary_{}_{}'.format('_'.join(vary_params), path_utils.get_date_str())) print(f'\nSaving benchmark run to {benchmark_dir}') os.mkdir(benchmark_dir) combined_params = {**constant_params_dict, **vary_params_dict} # Save params to file with open(os.path.join(benchmark_dir, 'run_params.json'), 'w+') as f: json.dump(combined_params, f, indent=4) # Flatten list, pass to other function flat_param_list = vary_params_cross_products(constant_params_dict, vary_params_dict) flat_param_list = benchmark_param_dicts(flat_param_list, benchmark_dir=benchmark_dir, **kwargs) # Parse results for d in flat_param_list: benchmark_dict = d['benchmark_dict'] best_scores = benchmark_dict['best_scores'] d['mu_best'] = np.mean(best_scores) d['sigma_best'] = np.std(best_scores) solve_gens = benchmark_dict['solve_gens'] d['mu_solve_gens'] = np.mean(solve_gens) d['sigma_solve_gens'] = np.std(solve_gens) #pp.pprint(d, width=1) # Get rid of this now d.pop('benchmark_dict') # Save results to csv for later parsing/plotting df = pd.DataFrame(flat_param_list) print(tabulate(df, headers=df.columns.values, tablefmt='psql')) df_fname = os.path.join(benchmark_dir, 'vary_benchmark_results.csv') df.to_csv(df_fname, index=False) # Only need to do if more than 2 params were varied. if len(vary_params) >= 2: # Create heatmap plots dir heatmap_dir = os.path.join(benchmark_dir, 'heatmap_plots') print(f'\nSaving heatmap plots to {heatmap_dir}') os.mkdir(heatmap_dir) # Iterate over all unique pairs of vary params, plot heatmaps of them for pair in itertools.combinations(vary_params, 2): print(f'Making heatmaps for {pair}') other_params_flat = [(k, v) for k,v in vary_params_dict.items() if k not in pair] other_params = [x[0] for x in other_params_flat] other_vals = [x[1] for x in other_params_flat] print(f'other params: {other_params}') # Create dir for specific pivot pivot_name = 'vary_{}_{}'.format(*pair) pivot_dir = os.path.join(heatmap_dir, pivot_name) os.mkdir(pivot_dir) # Select for each of the combos of the other params. for other_params_set in itertools.product(*other_vals): other_sel_dict = dict(zip(other_params, other_params_set)) fname_label = path_utils.param_dict_to_fname_str(other_sel_dict) df_sel = df.loc[(df[list(other_sel_dict)] == pd.Series(other_sel_dict)).all(axis=1)] heatmap_plot(df_sel, *pair, 'mu_best', pivot_dir, label=fname_label) heatmap_plot(df_sel, *pair, 'mu_solve_gens', pivot_dir, label=fname_label)
import path_utils from Sample import Sample import os # env_name = 'CartPole-v0' # env_name = 'MountainCarContinuous-v0' # env_name = 'Acrobot-v1' env_name = "Pendulum-v0" base_dir = os.path.join( path_utils.get_output_dir(), "make_gifs_" + env_name + "_" + path_utils.get_date_str(), ) os.mkdir(base_dir) e = Sample( env_name, NN="FFNN", N_hidden_layers=1, N_hidden_units=4, use_bias=True, base_dir=base_dir, random_dist="uniform", random_dist_scaling=10.0, ) sample_dict = e.sample(10000, N_episodes=10, print_samp_num=True) e.save_all_sample_stats(sample_dict) print(sample_dict["best_weights"])
def hyperparam_search_const(run_fn, center_dict, vary_param_dict, output_dir, **kwargs): ''' This is when you have a "center" dict of kwargs (that could be seen as the "control"), and you want to vary other parameters one at a time, while keeping the others constant. So pass it something like: center_dict = {'param1' : 5, 'param2' : 'g', 'param3' : 492}, and vary_param_dict = {'param1' : [2, 10], 'param2' : ['h', 'q', 'z']}, and it will try the center value, and then 5 other variants. The function that will be run, run_fn, has to take only kwargs. You'll probably have to write an external wrapper for it, if you want to do something like create an object of a class and run a few functions for it. output_dir is the path you want this function to build the whole tree structure in (it will create run_dir for this). This isn't fully general at the moment because of the save_param_R_curves. It should be expanded so that fn is passed as an arg. ''' # How many runs of each set of params it will do N_runs = kwargs.get('N_runs', 1) save_R = kwargs.get('save_R', False) # Create the run_dir for all the vary_params run_dir = os.path.join(output_dir, 'vary_param_{}'.format(path_utils.get_date_str())) os.mkdir(run_dir) # Whether to do the center run or not center_run = kwargs.get('center_run', True) if center_run: print('\n\nRunning with center...') pp.pprint(center_dict, width=1) # Create dir for center run center_dir = os.path.join(run_dir, 'center_runs') os.mkdir(center_dir) param_R_curves = [] # Deepcopy the center_dict, so varying the copy doesn't mess up anything. d = deepcopy(center_dict) d['base_dir'] = center_dir for i in range(N_runs): R_curve = run_fn(**d) param_R_curves.append(R_curve) if save_R: save_param_R_curves(param_R_curves, d, center_dir) for param, param_list in vary_param_dict.items(): print('\n\nNow varying parameter: ', param) vary_dir = os.path.join(run_dir, 'vary_{}'.format(param)) os.mkdir(vary_dir) for v in param_list: param_R_curves = [] d = deepcopy(center_dict) d[param] = v d['base_dir'] = vary_dir d['fname_note'] = '{}={}'.format(param, v) print('\n\nNow running with parameter {} = {}\n'.format(param, v)) pp.pprint(d, width=1) for i in range(N_runs): R_curve = run_fn(**d) param_R_curves.append(R_curve) if save_R: save_param_R_curves(param_R_curves, {param : v}, vary_dir)