示例#1
0
def benchmark_param_dicts(params_dict_list, **kwargs):

    '''
    Pass this a list of dicts, where each has the different parameters you want
    to benchmark.

    It then iterates through this list, doing a benchmark for each dict.
    '''

    # Create dir for the results of this benchmark if one isn't provided.
    benchmark_dir = kwargs.get('benchmark_dir', None)
    if benchmark_dir is None:
        benchmark_dir = os.path.join(path_utils.get_output_dir(), 'Benchmark_{}'.format(path_utils.get_date_str()))
        os.mkdir(benchmark_dir)

    for d in params_dict_list:

        # If a run_fname_label is provided, use that to create a more informative dir name.
        # Otherwise, just use the date.
        if 'run_fname_label' in d.keys():
            run_fname_label = d['run_fname_label']
        else:
            run_fname_label = 'vary_params'

        # Base dir for this specific benchmark
        params_dir = os.path.join(benchmark_dir, '{}_{}'.format(run_fname_label, path_utils.get_date_str()))
        os.mkdir(params_dir)

        # To hold the actual runs (FF)
        runs_dir = os.path.join(params_dir, 'runs')
        os.mkdir(runs_dir)

        print('\n\nNow benchmarking params:')
        pp.pprint(d, width=1)
        print('\n\n')
        benchmark_dict = benchmark_param_dict(d, kwargs.get('N_dist', 10), kwargs.get('N_gen', 100), runs_dir)

        # Add to dict
        d['benchmark_dict'] = deepcopy(benchmark_dict)

        # Make plots for this benchmark
        if 'run_plot_label' in d.keys():
            run_plot_label = d['run_plot_label']
        else:
            run_plot_label = run_fname_label

        # Plots for benchmark
        fname = os.path.join(params_dir, f'{run_fname_label}_solve_gens_dist.png')
        plot_benchmark_dist(run_plot_label, benchmark_dict['solve_gens'], 'Solve generation', fname)
        fname = os.path.join(params_dir, f'{run_fname_label}_best_scores_dist.png')
        plot_benchmark_dist(run_plot_label, benchmark_dict['best_scores'], 'Best score', fname)

    # Return passed list, which should have dicts
    # modified with the results
    return params_dict_list
示例#2
0
def benchmark_envs(env_list, **kwargs):

    '''
    Iterates over a list of env names you give it,
    benchmarking it and recording info.

    For each env, it
    '''

    N_dist = kwargs.get('N_dist', 10) # How many evolutions to run, to form a distribution
    N_gen = kwargs.get('N_gen', 1000)

    # Create dir for the results of this benchmark.
    benchmark_dir = os.path.join(path_utils.get_output_dir(), 'Benchmark_{}'.format(path_utils.get_date_str()))
    os.mkdir(benchmark_dir)

    # Dict to hold results on timing, etc.
    benchmark_dict = {}

    for env_name in env_list:

        print(f'\nBenchmarking env {env_name} now...\n')
        # Create a dir for this env.
        env_dir = os.path.join(benchmark_dir, env_name)
        os.mkdir(env_dir)

        env_runs_dir = os.path.join(env_dir, 'runs')
        os.mkdir(env_runs_dir)

        param_dict = deepcopy(kwargs)
        param_dict['env_name'] = env_name

        benchmark_dict[env_name] = benchmark_param_dict(param_dict, N_dist, N_gen, env_runs_dir)
        benchmark_dict[env_name]['env_dir'] = env_dir


    # Save distributions to file
    with open(os.path.join(benchmark_dir, 'benchmark_stats.json'), 'w+') as f:
        json.dump(benchmark_dict, f, indent=4)

    # Plot each env dist.
    for k,v in benchmark_dict.items():

        # Makes sure the run finished
        if 'solve_gens' in v.keys():
            fname = os.path.join(v['env_dir'], f'{k}_solve_gens_dist.png')
            plot_benchmark_dist(k, v['solve_gens'], 'Solve generation', fname)

            fname = os.path.join(v['env_dir'], f'{k}_best_scores_dist.png')
            plot_benchmark_dist(k, v['best_scores'], 'Best score', fname)

            fname = os.path.join(v['env_dir'], f'{k}_all_scores_dist.png')
            plot_benchmark_dist(k, v['all_scores'], 'All scores', fname, N_bins=20, plot_log=True)
示例#3
0
def ramp_difficulty(train_class, **kwargs):

	'''
	This should hopefully "ramp up" the difficulty, letting it
	solve it for the easy case, and then adapt to the harder cases.
	'''

	base_dir = os.path.join(path_utils.get_output_dir(), 'ramp_difficulty_{}'.format(path_utils.get_date_str()))
	os.mkdir(base_dir)

	kwargs['base_dir'] = base_dir

	cm = train_class(**kwargs)

	#cat_speed_rels = [3.0, 3.1, 3.2, 3.3, 3.35, 3.4, 3.45, 3.5, 3.55, 3.6, 3.7, 3.8, 3.9, 4.0]
	N_eps_chunk = kwargs.get('N_eps', 5000)
	cat_speed_rels = {
						3.1 : 1*N_eps_chunk,
						3.2 : 1*N_eps_chunk,
						3.3 : 1*N_eps_chunk,
						3.4 : 2*N_eps_chunk,
						3.5 : 2*N_eps_chunk,
						3.6 : 2*N_eps_chunk,
						3.65 : 2*N_eps_chunk,
						3.7 : 2*N_eps_chunk,
						3.75 : 2*N_eps_chunk,
						3.8 : 2*N_eps_chunk,
					}


	c_s_r_fname = os.path.join(cm.dir, 'c_s_r_list.json')
	with open(c_s_r_fname, 'w+') as f:
		json.dump({'cat_speed_rels' : cat_speed_rels}, f, indent=4)

	for i, (c_s, N_eps) in enumerate(cat_speed_rels.items()):

		print(f'\nRunning with cat_speed_rel = {c_s} now!\n')

		if kwargs.get('reset_buffer', False):
			cm.ER.reset_buffer()

		cm.agent.set_cat_speed(c_s)
		if i==0:
			cm.train(N_eps, kwargs.get('N_steps', 500), reset_hist=True)
		else:
			cm.train(N_eps, kwargs.get('N_steps', 500), reset_hist=False)

	cm.train_epochs.append(cm.total_step)
	cm.plot_train(save_plot=True, show_plot=False)

	cm.save_model()
示例#4
0
    def __init__(self, env_name, **kwargs):

        # Create env, create agent
        self.setup_env(env_name)
        self.agent = Agent.Agent(self.env, **kwargs)

        self.noise_sd = 1.0
        self.max_episode_steps = kwargs.get("max_episode_steps", 500)

        # Get the base dir, which is where runs will be saved to. Default
        # is /output/
        base_dir = kwargs.get("base_dir", path_utils.get_output_dir())

        # Datetime string for labeling the run
        self.dt_str = path_utils.get_date_str()

        # If you don't pass anything, it will create a dir in base_dir to
        # hold the results of this run, but you can supply your own externally.
        self.run_dir = kwargs.get("run_dir", None)
        if self.run_dir is None:
            self.run_dir = os.path.join(
                base_dir, f"{self.env_name}_sample_{self.dt_str}")
            os.mkdir(self.run_dir)

        # For saving the parameters used for the run. Run last in __init__().
        if kwargs.get("load_params_from_dir", False):
            self.load_params_dict()
        else:
            self.run_params = kwargs.copy()
            self.save_params_dict()

        #### Plot params
        self.plot_pt_alpha = 0.2
        self.plot_label_params = {"fontsize": 18}
        self.plot_tick_params = {"fontsize": 13}
        self.plot_title_params = {"fontsize": 18}

        self.plot_params = {
            "plot_pt_alpha": self.plot_pt_alpha,
            "plot_label_params": self.plot_label_params,
            "plot_tick_params": self.plot_tick_params,
            "plot_title_params": self.plot_title_params,
        }
示例#5
0
    def __init__(self, env_name, **kwargs):

        # The search method used. Default is Random Weight Guessing (RWG).
        self.search_method = kwargs.get('search_method', 'RWG')
        assert self.search_method in [
            'RWG', 'gaussian_noise_hill_climb', 'grid_search',
            'bin_grid_search', 'sparse_bin_grid_search'
        ], 'Must supply valid search_method!'

        # Create env, create agent
        self.setup_env(env_name)
        self.agent = Agent.Agent(self.env, **kwargs)

        self.noise_sd = 1.0
        self.max_episode_steps = kwargs.get('max_episode_steps', 500)

        # Get the base dir, which is where runs will be saved to. Default
        # is /output/
        base_dir = kwargs.get('base_dir', path_utils.get_output_dir())

        # Datetime string for labeling the run
        self.dt_str = path_utils.get_date_str()

        # If you don't pass anything, it will create a dir in base_dir to
        # hold the results of this run, but you can supply your own externally.
        self.run_dir = kwargs.get('run_dir', None)
        if self.run_dir is None:
            self.run_dir = os.path.join(base_dir,
                                        f'{self.env_name}_evo_{self.dt_str}')
            os.mkdir(self.run_dir)

        # For saving the parameters used for the run. Run last in __init__().
        if kwargs.get('load_params_from_dir', False):
            self.load_params_dict()
        else:
            self.run_params = kwargs.copy()
            self.save_params_dict()

        #### Plot params
        self.plot_pt_alpha = 0.2
        self.plot_label_params = {'fontsize': 14}
        self.plot_tick_params = {'fontsize': 11}
        self.plot_title_params = {'fontsize': 16}
示例#6
0
def load_best_agent_sample_from_dir(dir):
    assert os.path.exists(dir), f"Dir must exist to load from! Dir {dir} DNE."

    run_params_json_fname = os.path.join(dir, "run_params.json")
    assert os.path.exists(
        run_params_json_fname
    ), f"run_params.json must exist in dir to load from! {run_params_json_fname} DNE."

    sample_dict_fname = os.path.join(dir, "sample_stats.json")
    assert os.path.exists(
        sample_dict_fname
    ), f"sample_stats.json must exist in dir to load from! {sample_dict_fname} DNE."

    # Get run_params to recreate the object
    with open(run_params_json_fname, "r") as f:
        run_params = json.load(f)

    run_params["run_dir"] = os.path.join(path_utils.get_output_dir(), "tmp")
    del run_params["dt_str"]
    print("\nPassing this dict to create a new Sample():")
    print(run_params)
    print()
    env_name = run_params.pop("env_name")

    s = Sample(env_name, **run_params)

    with open(sample_dict_fname, "r") as f:
        sample_dict = json.load(f)

    print(sample_dict.keys())
    best_weights = sample_dict["best_weights"]
    print("\nLoading best weights:")
    print(best_weights)
    best_weights_mat = [np.array(w) for w in best_weights]
    s.agent.set_weight_matrix(best_weights_mat)

    return s
示例#7
0
    'LR': 2 * 10**-4,
    'N_batch': 5,
    'optim': 'Adam',
    'beta_entropy': 10**-5,
    'sigma_min': 10**-5,
    'clamp_grad': 10,
    'cat_speed_rel': 3.2,
    'noise_sigma': 0.5
}

vary_dict = {'cat_speed_rel': [3.3, 3.4], 'LR': [2 * 10**-3]}

rt.hyperparam_search_const(aux_functions.run_train_sequence,
                           center_dict,
                           vary_dict,
                           path_utils.get_output_dir(),
                           N_runs=5,
                           save_R=True,
                           center_run=True)

#################################### DDPG difficulty tests

center_dict = {
    'train_class': CatMouse_DDPG,
    'N_eps': 20000,
    'noise_sigma': 0.5,
    'max_ep_steps': 100,
    'N_steps': 100,
    'decay_noise': True,
    'cat_speed_rel': 3.0
}
示例#8
0
'''

DDPG implementation.


'''

default_kwargs = {
    'gamma': 0.99,
    'optim': 'Adam',
    'LR_actor': 1 * 10**-4,
    'LR_critic': 1 * 10**-3,
    'clamp_grad': 10000.0,
    'hidden_size': 200,
    'init_weights': True,
    'base_dir': path_utils.get_output_dir(),
    'noise_sigma': 0.2,
    'noise_theta': 0.15,
    'noise_dt': 10**-2,
    'noise_mu': 'zero',
    'noise_decay_limit': 10**-3,
    'max_buffer_size': 10**6,
    'ER_batch_size': 64,
    'tau': 0.001,
    'decay_noise': True,
    'noise_method': 'OU'
}


class CatMouse_DDPG:
    def __init__(self, **kwargs):
示例#9
0
import path_utils
import os
import Sample

best_agent_dir = ""

dir = os.path.join(path_utils.get_output_dir(), best_agent_dir)

s = Sample.load_best_agent_sample_from_dir(dir)

s.run_episode(show_ep=True)
    # Append even if there was no improvement
    best_scores.append(best_score)

# Plot results
print(f'Best score achieved: {best_score}')
print(f'Best weight matrix: \n{best_weights}')

plt.plot(best_scores, color='tomato', label='Best FF found')
plt.plot(all_scores, color='dodgerblue', label='All FF')

plt.xlabel('Episode')
plt.ylabel('Fitness Function (FF)')
plt.legend()
plt.title('CartPole-v0 environment')
plt.savefig(os.path.join(path_utils.get_output_dir(), 'NE_cartpole_FF.png'))
plt.show()

#env = gym.wrappers.Monitor(env, 'video', force = True)
# Set to best weights found, run episode and show
net.set_weights(best_weights)
obs = env.reset()
score = 0
done = False
while not done:
    env.render()
    action = net.get_action(obs)
    obs, rew, done, info = env.step(action)
    score += rew

print(f'Final score: {score}')
示例#11
0
def benchmark_vary_params(constant_params_dict, vary_params_dict, **kwargs):

    '''
    This is a convenience function to easily vary parameters for benchmarking.
    You pass it constant_params_dict, which is a dict with the values that
    you want to remain constant between runs. Then, pass it vary_params_dict,
    which should have each parameter that you want to vary as a list of the values
    it should take.

    Example:

    constant_params_dict = {
        'env_name' : 'CartPole-v0',
        'N_gen' : 1000,
        'N_dist' : 100,
        'NN' : 'FFNN_multilayer'
    }

    vary_params_dict = {
        'N_hidden_units' : [2, 4, 8],
        'act_fn' : ['tanh', 'relu']
    }

    This will do 3*2 = 6 runs, for each of the combinations of varying parameters.
    '''

    # Create informative dir name
    vary_params = list(vary_params_dict.keys())
    benchmark_dir = os.path.join(
                        path_utils.get_output_dir(),
                        'Benchmark_vary_{}_{}'.format('_'.join(vary_params), path_utils.get_date_str()))
    print(f'\nSaving benchmark run to {benchmark_dir}')
    os.mkdir(benchmark_dir)

    combined_params = {**constant_params_dict, **vary_params_dict}
    # Save params to file
    with open(os.path.join(benchmark_dir, 'run_params.json'), 'w+') as f:
        json.dump(combined_params, f, indent=4)


    # Flatten list, pass to other function
    flat_param_list = vary_params_cross_products(constant_params_dict, vary_params_dict)
    flat_param_list = benchmark_param_dicts(flat_param_list, benchmark_dir=benchmark_dir, **kwargs)

    # Parse results
    for d in flat_param_list:
        benchmark_dict = d['benchmark_dict']

        best_scores = benchmark_dict['best_scores']
        d['mu_best'] = np.mean(best_scores)
        d['sigma_best'] = np.std(best_scores)

        solve_gens = benchmark_dict['solve_gens']
        d['mu_solve_gens'] = np.mean(solve_gens)
        d['sigma_solve_gens'] = np.std(solve_gens)

        #pp.pprint(d, width=1)
        # Get rid of this now
        d.pop('benchmark_dict')

    # Save results to csv for later parsing/plotting
    df = pd.DataFrame(flat_param_list)
    print(tabulate(df, headers=df.columns.values, tablefmt='psql'))
    df_fname = os.path.join(benchmark_dir, 'vary_benchmark_results.csv')
    df.to_csv(df_fname, index=False)

    # Only need to do if more than 2 params were varied.
    if len(vary_params) >= 2:

        # Create heatmap plots dir
        heatmap_dir = os.path.join(benchmark_dir, 'heatmap_plots')
        print(f'\nSaving heatmap plots to {heatmap_dir}')
        os.mkdir(heatmap_dir)

        # Iterate over all unique pairs of vary params, plot heatmaps of them
        for pair in itertools.combinations(vary_params, 2):

            print(f'Making heatmaps for {pair}')

            other_params_flat = [(k, v) for k,v in vary_params_dict.items() if k not in pair]
            other_params = [x[0] for x in other_params_flat]
            other_vals = [x[1] for x in other_params_flat]
            print(f'other params: {other_params}')

            # Create dir for specific pivot
            pivot_name = 'vary_{}_{}'.format(*pair)
            pivot_dir = os.path.join(heatmap_dir, pivot_name)
            os.mkdir(pivot_dir)

            # Select for each of the combos of the other params.
            for other_params_set in itertools.product(*other_vals):
                other_sel_dict = dict(zip(other_params, other_params_set))
                fname_label = path_utils.param_dict_to_fname_str(other_sel_dict)
                df_sel = df.loc[(df[list(other_sel_dict)] == pd.Series(other_sel_dict)).all(axis=1)]

                heatmap_plot(df_sel, *pair, 'mu_best', pivot_dir, label=fname_label)
                heatmap_plot(df_sel, *pair, 'mu_solve_gens', pivot_dir, label=fname_label)
import path_utils
import os
import Sample

import gym_raas

replot_dir = ""

dir = os.path.join(path_utils.get_output_dir(), replot_dir)

s = Sample.replot_sample_dict_from_dir(dir)