def test_benchmark_categorical_cnn_policy(self): timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp for env_id in ['CubeCrash-v0', 'MemorizeDigits-v0']: env = gym.make(env_id) seeds = random.sample(range(100), num_of_trials) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark_ppo.png'.format(env_id)) relplt_file = osp.join(benchmark_dir, '{}_benchmark_ppo_mean.png'.format(env_id)) metarl_csvs = [] metarl_model_csvs = [] for trial in range(num_of_trials): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_dir = trial_dir + '/metarl' with tf.Graph().as_default(): # Run metarl algorithms env.reset() metarl_csv = run_metarl(env, seed, metarl_dir) metarl_csvs.append(metarl_csv) env.close() Rh.relplot(g_csvs=metarl_csvs, b_csvs=metarl_model_csvs, g_x='Iteration', g_y='Evaluation/AverageReturn', g_z='MetaRL', b_x='Iteration', b_y='Evaluation/AverageReturn', b_z='MetaRLWithModel', trials=num_of_trials, seeds=seeds, plt_file=relplt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn') Rh.plot(g_csvs=metarl_csvs, b_csvs=metarl_model_csvs, g_x='Iteration', g_y='Evaluation/AverageReturn', g_z='MetaRL', b_x='Iteration', b_y='Evaluation/AverageReturn', b_z='MetaRLWithModel', trials=num_of_trials, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn')
def benchmark_continuous_mlp_q_function(self): # pylint: disable=no-self-use """Test Continuous MLP QFunction Benchmarking.""" mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'continuous_mlp_q_function', timestamp) for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), num_of_trials) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join( benchmark_dir, '{}_benchmark_continuous_mlp_q_function.png'.format(env_id)) garage_csvs = [] for trial in range(num_of_trials): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' with tf.Graph().as_default(): env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) env.close() Rh.relplot(g_csvs=garage_csvs, b_csvs=[], g_x='Epoch', g_y='Evaluation/AverageReturn', g_z='Garage', b_x=None, b_y=None, b_z=None, trials=num_of_trials, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn')
def test_benchmark_continuous_mlp_policy(self): mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'continuous_mlp_policy', timestamp) for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) seeds = random.sample(range(100), num_of_trials) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join( benchmark_dir, '{}_benchmark_continuous_mlp_policy.png'.format(env_id)) metarl_csvs = [] for trial in range(num_of_trials): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_dir = trial_dir + '/metarl' with tf.Graph().as_default(): env.reset() metarl_csv = run_metarl(env, seed, metarl_dir) metarl_csvs.append(metarl_csv) env.close() Rh.relplot(g_csvs=metarl_csvs, b_csvs=[], g_x='Epoch', g_y='Evaluation/AverageReturn', g_z='MetaRL', b_x=None, b_y=None, b_z=None, trials=num_of_trials, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn')
def test_benchmark_gaussian_cnn_baseline(self): timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp for env_id in ['CubeCrash-v0', 'MemorizeDigits-v0']: env = gym.make(env_id) seeds = random.sample(range(100), num_of_trials) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join( benchmark_dir, '{}_benchmark_gaussian_cnn_baseline.png'.format(env_id)) garage_csvs = [] for trial in range(num_of_trials): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' with tf.Graph().as_default(): # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) env.close() Rh.relplot(g_csvs=garage_csvs, b_csvs=[], g_x='Iteration', g_y='Evaluation/AverageReturn', g_z='Garage', b_x='Iteration', b_y='Evaluation/AverageReturn', b_z='GarageWithModel', trials=num_of_trials, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn')
def test_benchmark_rl2(self): # pylint: disable=no-self-use """Compare benchmarks between metarl and baselines.""" # test set has a higher max_obs_dim env_obs_dim = [env().observation_space.shape[0] for (_, env) in ML45_ENVS['test'].items()] max_obs_dim = max(env_obs_dim) env_id = 'ML45' ML_train_envs = [ TaskIdWrapper(NormalizedRewardEnv(RL2Env(env(*ML45_ARGS['train'][task]['args'], **ML45_ARGS['train'][task]['kwargs']), max_obs_dim)), task_id=task_id, task_name=task) for (task_id, (task, env)) in enumerate(ML45_ENVS['train'].items()) ] tasks = task_sampler.EnvPoolSampler(ML_train_envs) tasks.grow_pool(hyper_parameters['meta_batch_size']) envs = tasks.sample(hyper_parameters['meta_batch_size']) env = envs[0]() timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/rl2/%s/' % timestamp result_json = {} # Start main loop seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_id) metarl_tf_csvs = [] for trial in range(hyper_parameters['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_tf_dir = trial_dir + '/metarl' with tf.Graph().as_default(): env.reset() metarl_tf_csv = run_metarl(env, envs, tasks, seed, metarl_tf_dir) metarl_tf_csvs.append(metarl_tf_csv) with open(osp.join(metarl_tf_dir, 'parameters.txt'), 'w') as outfile: hyper_parameters_copy = copy.deepcopy(hyper_parameters) hyper_parameters_copy['sampler_cls'] = str(hyper_parameters_copy['sampler_cls']) json.dump(hyper_parameters_copy, outfile) g_x = 'TotalEnvSteps' g_ys = [ 'Evaluation/AverageReturn', 'Evaluation/SuccessRate', ] for g_y in g_ys: plt_file = osp.join(benchmark_dir, '{}_benchmark_{}.png'.format(env_id, g_y.replace('/', '-'))) Rh.relplot(g_csvs=metarl_tf_csvs, b_csvs=None, g_x=g_x, g_y=g_y, g_z='MetaRL', b_x=None, b_y=None, b_z='ProMP', trials=hyper_parameters['n_trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label=g_x, y_label=g_y)
def benchmark_categorical_lstm_policy(self): categorical_tasks = [ 'LunarLander-v2', 'Assault-ramDeterministic-v4', 'Breakout-ramDeterministic-v4', 'ChopperCommand-ramDeterministic-v4', 'Tutankham-ramDeterministic-v4' ] timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/ppo_categ/%s/' % timestamp result_json = {} for task in categorical_tasks: env_id = task env = gym.make(env_id) # baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100) seeds = random.sample(range(100), 3) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) mean_plt_file = osp.join(benchmark_dir, '{}_benchmark_mean.png'.format(env_id)) garage_models_csvs = [] garage_csvs = [] for trial in range(3): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' with tf.Graph().as_default(): # Run baselines algorithms # baseline_env.reset() # baselines_csv = run_baselines(baseline_env, seed, # baselines_dir) # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) env.reset() garage_csvs.append(garage_csv) env.close() Rh.plot(b_csvs=garage_models_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='Evaluation/AverageReturn', g_z='garage', b_x='Iteration', b_y='Evaluation/AverageReturn', b_z='garage_model', trials=3, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn') Rh.relplot(b_csvs=garage_models_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='Evaluation/AverageReturn', g_z='garage', b_x='Iteration', b_y='Evaluation/AverageReturn', b_z='garage_model', trials=3, seeds=seeds, plt_file=mean_plt_file, env_id=env_id, x_label='Iteration', y_label='Evaluation/AverageReturn') result_json[env_id] = Rh.create_json( b_csvs=garage_models_csvs, g_csvs=garage_csvs, seeds=seeds, trails=3, g_x='Iteration', g_y='Evaluation/AverageReturn', b_x='Iteration', b_y='Evaluation/AverageReturn', factor_g=2048, factor_b=2048) Rh.write_file(result_json, 'PPO')
def test_benchmark_ddpg(self): ''' Compare benchmarks between metarl and baselines. :return: ''' # Load Mujoco1M tasks, you can check other benchmarks here # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py mujoco1m = benchmarks.get_benchmark('Mujoco1M') timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks', 'ddpg', timestamp) result_json = {} for task in mujoco1m['tasks']: env_id = task['env_id'] env = gym.make(env_id) baseline_env = AutoStopEnv( env_name=env_id, max_path_length=params['n_rollout_steps']) seeds = random.sample(range(100), task['trials']) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) relplt_file = osp.join(benchmark_dir, '{}_benchmark_mean.png'.format(env_id)) baselines_csvs = [] metarl_csvs = [] for trial in range(task['trials']): env.reset() baseline_env.reset() seed = seeds[trial] trial_dir = osp.join( task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed)) metarl_dir = osp.join(trial_dir, 'metarl') baselines_dir = osp.join(trial_dir, 'baselines') with tf.Graph().as_default(): # Run metarl algorithms metarl_csv = run_metarl(env, seed, metarl_dir) # Run baselines algorithms baselines_csv = run_baselines(baseline_env, seed, baselines_dir) metarl_csvs.append(metarl_csv) baselines_csvs.append(baselines_csv) env.close() Rh.plot(b_csvs=baselines_csvs, g_csvs=metarl_csvs, g_x='Epoch', g_y='Evaluation/AverageReturn', g_z='MetaRL', b_x='total/epochs', b_y='rollout/return', b_z='Baseline', trials=task['trials'], seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Epoch', y_label='Evaluation/AverageReturn') Rh.relplot(g_csvs=metarl_csvs, b_csvs=baselines_csvs, g_x='Epoch', g_y='Evaluation/AverageReturn', g_z='MetaRL', b_x='total/epochs', b_y='rollout/return', b_z='Baseline', trials=task['trials'], seeds=seeds, plt_file=relplt_file, env_id=env_id, x_label='Epoch', y_label='Evaluation/AverageReturn') result_json[env_id] = Rh.create_json( b_csvs=baselines_csvs, g_csvs=metarl_csvs, seeds=seeds, trails=task['trials'], g_x='Epoch', g_y='Evaluation/AverageReturn', b_x='total/epochs', b_y='rollout/return', factor_g=params['steps_per_epoch'] * params['n_rollout_steps'], factor_b=1) Rh.write_file(result_json, 'DDPG')
def test_benchmark_rl2(self): # pylint: disable=no-self-use """Compare benchmarks between metarl and baselines.""" if ML: if env_ind == 2: envs = [ML1.get_train_tasks('push-v1')] env_ids = ['ML1-push-v1'] elif env_ind == 3: envs = [ML1.get_train_tasks('reach-v1')] env_ids = ['ML1-reach-v1'] elif env_ind == 4: envs = [ML1.get_train_tasks('pick-place-v1')] env_ids = ['ML1-pick-place-v1'] else: raise ValueError("Env index is wrong") else: if env_ind == 0: envs = [HalfCheetahVelEnv] env_ids = ['HalfCheetahVelEnv'] elif env_ind == 1: envs = [HalfCheetahDirEnv] env_ids = ['HalfCheetahDirEnv'] else: raise ValueError("Env index is wrong") timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/rl2/%s/' % timestamp result_json = {} for i, env in enumerate(envs): seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_ids[i]) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_ids[i])) metarl_tf_csvs = [] promp_csvs = [] for trial in range(hyper_parameters['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) promp_dir = trial_dir + '/promp' with tf.Graph().as_default(): if isinstance(env, gym.Env): env.reset() promp_csv = run_promp(env, seed, promp_dir) else: promp_csv = run_promp(env(), seed, promp_dir) promp_csvs.append(promp_csv) with open(osp.join(promp_dir, 'parameters.txt'), 'w') as outfile: json.dump(hyper_parameters, outfile) if isinstance(env, gym.Env): env.close() p_x = 'n_timesteps' if ML: p_ys = ['train-AverageReturn', 'train-SuccessRate'] else: p_ys = ['train-AverageReturn'] for p_y in p_ys: plt_file = osp.join( benchmark_dir, '{}_benchmark_promp_{}.png'.format(env_ids[i], p_y.replace('/', '-'))) Rh.relplot(g_csvs=promp_csvs, b_csvs=None, g_x=p_x, g_y=p_y, g_z='ProMP', b_x=None, b_y=None, b_z='None', trials=hyper_parameters['n_trials'], seeds=seeds, plt_file=plt_file, env_id=env_ids[i])
def test_benchmark_rl2(self): # pylint: disable=no-self-use """Compare benchmarks between metarl and baselines.""" if ML: if env_ind == 2: envs = [ML1.get_train_tasks('push-v1')] env_ids = ['ML1-push-v1'] elif env_ind == 3: envs = [ML1.get_train_tasks('reach-v1')] env_ids = ['ML1-reach-v1'] elif env_ind == 4: envs = [ML1.get_train_tasks('pick-place-v1')] env_ids = ['ML1-pick-place-v1'] else: raise ValueError("Env index is wrong") else: if env_ind == 0: envs = [HalfCheetahVelEnv] env_ids = ['HalfCheetahVelEnv'] elif env_ind == 1: envs = [HalfCheetahDirEnv] env_ids = ['HalfCheetahDirEnv'] else: raise ValueError("Env index is wrong") timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/rl2/%s/' % timestamp result_json = {} for i, env in enumerate(envs): seeds = random.sample(range(100), hyper_parameters['n_trials']) task_dir = osp.join(benchmark_dir, env_ids[i]) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_ids[i])) metarl_tf_csvs = [] promp_csvs = [] for trial in range(hyper_parameters['n_trials']): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) metarl_tf_dir = trial_dir + '/metarl' promp_dir = trial_dir + '/promp' with tf.Graph().as_default(): metarl_tf_csv = run_metarl(env, seed, metarl_tf_dir) metarl_tf_csvs.append(metarl_tf_csv) with open(osp.join(metarl_tf_dir, 'parameters.txt'), 'w') as outfile: hyper_parameters_copy = copy.deepcopy(hyper_parameters) hyper_parameters_copy['sampler_cls'] = str( hyper_parameters_copy['sampler_cls']) json.dump(hyper_parameters_copy, outfile) g_x = 'TotalEnvSteps' if ML: g_ys = [ 'Evaluation/AverageReturn', 'Evaluation/SuccessRate', ] else: g_ys = [ 'Evaluation/AverageReturn', ] for g_y in g_ys: plt_file = osp.join( benchmark_dir, '{}_benchmark_rl2_{}.png'.format(env_ids[i], g_y.replace('/', '-'))) Rh.relplot(g_csvs=metarl_tf_csvs, b_csvs=None, g_x=g_x, g_y=g_y, g_z='MetaRL', b_x=None, b_y=None, b_z=None, trials=hyper_parameters['n_trials'], seeds=seeds, plt_file=plt_file, env_id=env_ids[i], x_label=g_x, y_label=g_y)
def test_benchmark_categorical_mlp_policy(self): ''' Compare benchmarks between garage and baselines. :return: ''' categorical_tasks = [ 'LunarLander-v2', 'CartPole-v1', 'Assault-ramDeterministic-v4', 'Breakout-ramDeterministic-v4', 'ChopperCommand-ramDeterministic-v4', 'Tutankham-ramDeterministic-v4' ] timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f') benchmark_dir = './data/local/benchmarks/categorical_mlp_policy/{0}/' benchmark_dir = benchmark_dir.format(timestamp) result_json = {} for task in categorical_tasks: env_id = task env = gym.make(env_id) trials = 3 seeds = random.sample(range(100), trials) task_dir = osp.join(benchmark_dir, env_id) plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id)) relplt_file = osp.join(benchmark_dir, '{}_benchmark_mean.png'.format(env_id)) garage_csvs = [] for trial in range(trials): seed = seeds[trial] trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed) garage_dir = trial_dir + '/garage' with tf.Graph().as_default(): # Run garage algorithms env.reset() garage_csv = run_garage(env, seed, garage_dir) garage_csvs.append(garage_csv) env.close() Rh.plot(b_csvs=garage_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='AverageReturn', g_z='Garage', b_x='Iteration', b_y='AverageReturn', b_z='Garage', trials=trials, seeds=seeds, plt_file=plt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn') Rh.relplot(b_csvs=garage_csvs, g_csvs=garage_csvs, g_x='Iteration', g_y='AverageReturn', g_z='Garage', b_x='Iteration', b_y='AverageReturn', b_z='Garage', trials=trials, seeds=seeds, plt_file=relplt_file, env_id=env_id, x_label='Iteration', y_label='AverageReturn') result_json[env_id] = Rh.create_json(b_csvs=garage_csvs, g_csvs=garage_csvs, seeds=seeds, trails=trials, g_x='Iteration', g_y='AverageReturn', b_x='Iteration', b_y='AverageReturn', factor_g=2048, factor_b=2048) Rh.write_file(result_json, 'PPO')