def benchmark_trpo(self): # pylint: disable=no-self-use """Compare benchmarks between garage and baselines.""" mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/trpo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) garage_tf_csvs = [] garage_pytorch_csvs = [] for trial in range(hyper_parameters['n_trials']): _PLACEHOLDER_CACHE.clear() seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_tf_dir = trial_dir + '/garage_tf' garage_pytorch_dir = trial_dir + '/garage_pytorch' # Run garage algorithms env.reset() garage_pytorch_csv = run_garage_pytorch( env, seed, garage_pytorch_dir) # pylint: disable=not-context-manager with tf.Graph().as_default(): env.reset() garage_tf_csv = run_garage_tf(env, seed, garage_tf_dir) garage_tf_csvs.append(garage_tf_csv) garage_pytorch_csvs.append(garage_pytorch_csv) env.close() benchmark_helper.plot_average_over_trials( [garage_tf_csvs, garage_pytorch_csvs], ['Evaluation/AverageReturn'] * 2, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn', names=['garage-TensorFlow', 'garage-PyTorch'], ) result_json[env_id] = benchmark_helper.create_json( [garage_tf_csvs, garage_pytorch_csvs], seeds=seeds, trials=hyper_parameters['n_trials'], xs=['Evaluation/Iteration'] * 2, ys=['Evaluation/AverageReturn'] * 2, factors=[hyper_parameters['batch_size']] * 2, names=['garage-TF', 'garage-PT']) Rh.write_file(result_json, 'TRPO')
def test_benchmark_trpo(self): """ Compare benchmarks between garage and baselines. :return: """ mujoco1m = benchmarks.get_benchmark("Mujoco1M") timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f") benchmark_dir = "./data/local/benchmarks/trpo/%s/" % timestamp for task in mujoco1m["tasks"]: env_id = task["env_id"] env = gym.make(env_id) baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100) seeds = random.sample(range(100), task["trials"]) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, "{}_benchmark.png".format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task["trials"]): _PLACEHOLDER_CACHE.clear() seed = seeds[trial] trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed) garage_dir = trial_dir + "/garage" baselines_dir = trial_dir + "/baselines" with tf.Graph().as_default(): # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) # Run baseline algorithms baseline_env.reset() baselines_csv = run_baselines(baseline_env, seed, baselines_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x="Iteration", g_y="EpisodeRewardMean", b_x="EpThisIter", b_y="EpRewMean", trials=task["trials"], seeds=seeds, plt_file=plt_file, env_id=env_id) env.close()
def test_benchmark_trpo(self): """ Compare benchmarks between garage and baselines. :return: """ mujoco1m = benchmarks.get_benchmark("Mujoco1M") timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f") benchmark_dir = "./benchmark_trpo/%s/" % timestamp for task in mujoco1m["tasks"]: env_id = task["env_id"] env = gym.make(env_id) seeds = random.sample(range(100), task["trials"]) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, "{}_benchmark.png".format(env_id)) baselines_csvs = [] garage_csvs = [] for trail in range(task["trials"]): _PLACEHOLDER_CACHE.clear() env.reset() seed = seeds[trail] trail_dir = task_dir + "/trail_%d_seed_%d" % (trail + 1, seed) garage_dir = trail_dir + "/garage" baselines_dir = trail_dir + "/baselines" baselines_csv = run_baselines(env_id, seed, baselines_dir) # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x="Iteration", g_y="AverageReturn", b_x="Iter", b_y="EpRewMean", trails=task["trials"], seeds=seeds, plt_file=plt_file, env_id=env_id)
def test_benchmark_trpo(self): ''' Compare benchmarks between garage and baselines. :return: ''' mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/trpo/%s/' % timestamp result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) baselines_csvs = [] garage_csvs = [] for trial in range(task['trials']): _PLACEHOLDER_CACHE.clear() seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' baselines_dir = trial_dir + '/baselines' with tf.Graph().as_default(): # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) # Run baseline algorithms baseline_env.reset() baselines_csv = run_baselines(baseline_env, seed, baselines_dir) garage_csvs.append(garage_csv) baselines_csvs.append(baselines_csv) Rh.plot(b_csvs=baselines_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='AverageReturn', b_x='EpThisIter', b_y='EpRewMean', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn') result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs, g_csvs=garage_csvs, seeds=seeds, trails=task['trials'], g_x='Iteration', g_y='AverageReturn', b_x='TimestepsSoFar', b_y='EpRewMean', factor_g=1024, factor_b=1) env.close() Rh.write_file(result_json, 'TRPO')