def test_benchmark_categorical_cnn_policy(self):
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        for env_id in ['CubeCrash-v0', 'MemorizeDigits-v0']:
            env = gym.make(env_id)

            seeds = random.sample(range(100), num_of_trials)

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark_ppo.png'.format(env_id))
            relplt_file = osp.join(benchmark_dir,
                                   '{}_benchmark_ppo_mean.png'.format(env_id))
            metarl_csvs = []
            metarl_model_csvs = []

            for trial in range(num_of_trials):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                metarl_dir = trial_dir + '/metarl'

                with tf.Graph().as_default():
                    # Run metarl algorithms
                    env.reset()
                    metarl_csv = run_metarl(env, seed, metarl_dir)

                metarl_csvs.append(metarl_csv)

            env.close()

            Rh.relplot(g_csvs=metarl_csvs,
                       b_csvs=metarl_model_csvs,
                       g_x='Iteration',
                       g_y='Evaluation/AverageReturn',
                       g_z='MetaRL',
                       b_x='Iteration',
                       b_y='Evaluation/AverageReturn',
                       b_z='MetaRLWithModel',
                       trials=num_of_trials,
                       seeds=seeds,
                       plt_file=relplt_file,
                       env_id=env_id,
                       x_label='Iteration',
                       y_label='Evaluation/AverageReturn')

            Rh.plot(g_csvs=metarl_csvs,
                    b_csvs=metarl_model_csvs,
                    g_x='Iteration',
                    g_y='Evaluation/AverageReturn',
                    g_z='MetaRL',
                    b_x='Iteration',
                    b_y='Evaluation/AverageReturn',
                    b_z='MetaRLWithModel',
                    trials=num_of_trials,
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='Evaluation/AverageReturn')
Exemplo n.º 2
0
    def test_benchmark_ddpg(self):
        '''
        Compare benchmarks between garage and baselines.

        :return:
        '''
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'ddpg', timestamp)
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(
                env_name=env_id, max_path_length=params['n_rollout_steps'])
            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task['trials']):
                env.reset()
                baseline_env.reset()
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                garage_dir = osp.join(trial_dir, 'garage')
                baselines_dir = osp.join(trial_dir, 'baselines')

                with tf.Graph().as_default():
                    # Run garage algorithms
                    garage_csv = run_garage(env, seed, garage_dir)

                    # Run baselines algorithms
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            Rh.plot(b_csvs=baselines_csvs,
                    g_csvs=garage_csvs,
                    g_x='Epoch',
                    g_y='AverageReturn',
                    b_x='total/epochs',
                    b_y='rollout/return',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Epoch',
                    y_label='AverageReturn')

            result_json[env_id] = Rh.create_json(
                b_csvs=baselines_csvs,
                g_csvs=garage_csvs,
                seeds=seeds,
                trails=task['trials'],
                g_x='Epoch',
                g_y='AverageReturn',
                b_x='total/epochs',
                b_y='rollout/return',
                factor_g=params['n_epoch_cycles'] * params['n_rollout_steps'],
                factor_b=1)

        Rh.write_file(result_json, 'DDPG')
Exemplo n.º 3
0
    def test_benchmark_ppo(self):
        '''
        Compare benchmarks between garage and baselines.

        :return:
        '''
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/ppo/%s/' % timestamp
        result_json = {}
        for task in mujoco1m['tasks']:
            env_id = task['env_id']

            env = gym.make(env_id)
            baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task['trials']):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_dir = trial_dir + '/garage'
                baselines_dir = trial_dir + '/baselines'

                with tf.Graph().as_default():
                    # Run baselines algorithms
                    baseline_env.reset()
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            env.close()

            Rh.plot(b_csvs=baselines_csvs,
                    g_csvs=garage_csvs,
                    g_x='Iteration',
                    g_y='AverageReturn',
                    b_x='nupdates',
                    b_y='eprewmean',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='AverageReturn')

            result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs,
                                                 g_csvs=garage_csvs,
                                                 seeds=seeds,
                                                 trails=task['trials'],
                                                 g_x='Iteration',
                                                 g_y='AverageReturn',
                                                 b_x='nupdates',
                                                 b_y='eprewmean',
                                                 factor_g=2048,
                                                 factor_b=2048)

        Rh.write_file(result_json, 'PPO')
Exemplo n.º 4
0
    def test_benchmark_td3(self):
        """
        Test garage TD3 benchmarks.

        :return:
        """
        # Load Mujoco1M tasks, you can check other benchmarks here
        # https://github.com/openai/baselines/blob/master/baselines/bench/benchmarks.py # noqa: E501
        mujoco1m = benchmarks.get_benchmark('Mujoco1M')

        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = osp.join(os.getcwd(), 'data', 'local', 'benchmarks',
                                 'td3', timestamp)
        result_json = {}

        # rlkit throws error for'Reacher-V2' due to gym version mismatch
        mujoco1m['tasks'] = [
            task for task in mujoco1m['tasks']
            if task['env_id'] != 'Reacher-v2'
        ]

        for task in mujoco1m['tasks']:
            env_id = task['env_id']
            env = gym.make(env_id)
            rlkit_env = AutoStopEnv(env_name=env_id,
                                    max_path_length=params['n_rollout_steps'])
            seeds = random.sample(range(100), task['trials'])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            garage_csvs = []
            rlkit_csvs = []

            for trial in range(task['trials']):
                env.reset()
                rlkit_env.reset()
                seed = seeds[trial]

                trial_dir = osp.join(
                    task_dir, 'trial_{}_seed_{}'.format(trial + 1, seed))
                garage_dir = trial_dir + '/garage'
                rlkit_dir = osp.join(trial_dir, 'rlkit')

                with tf.Graph().as_default():
                    # Run rlkit algorithms
                    rlkit_csv = run_rlkit(rlkit_env, seed, rlkit_dir)

                    # Run garage algorithms
                    garage_csv = run_garage(env, seed, garage_dir)

                garage_csvs.append(garage_csv)
                rlkit_csvs.append(rlkit_csv)

            Rh.plot(b_csvs=rlkit_csvs,
                    g_csvs=garage_csvs,
                    g_x='Epoch',
                    g_y='Evaluation/AverageReturn',
                    g_z='garage',
                    b_x='Epoch',
                    b_y='evaluation/Average Returns',
                    b_z='rlkit',
                    trials=task['trials'],
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='Evaluation/AverageReturn')

            result_json[env_id] = Rh.create_json(
                b_csvs=rlkit_csvs,
                g_csvs=garage_csvs,
                seeds=seeds,
                trails=task['trials'],
                g_x='Epoch',
                g_y='Evaluation/AverageReturn',
                b_x='Epoch',
                b_y='evaluation/Average Returns',
                factor_g=1,
                factor_b=1)

        Rh.write_file(result_json, 'TD3')
Exemplo n.º 5
0
def test_benchmark_categorical_gru_policy():
    """Benchmark categorical gru policy."""
    categorical_tasks = [
        'LunarLander-v2',
        'Assault-ramDeterministic-v4',
        'Breakout-ramDeterministic-v4',
        'ChopperCommand-ramDeterministic-v4',
        'Tutankham-ramDeterministic-v4',
        'CartPole-v1',
    ]
    timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
    benchmark_dir = './data/local/benchmarks/ppo_categ_gru/%s/' % timestamp
    result_json = {}
    for task in categorical_tasks:
        env_id = task
        env = gym.make(env_id)
        # baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

        seeds = random.sample(range(100), 3)

        task_dir = osp.join(benchmark_dir, env_id)
        plt_file = osp.join(benchmark_dir, '{}_benchmark.png'.format(env_id))
        baselines_csvs = []
        garage_csvs = []

        for trial in range(3):
            seed = seeds[trial]

            trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
            garage_dir = trial_dir + '/garage'

            with tf.Graph().as_default():
                # Run baselines algorithms
                # baseline_env.reset()
                # baselines_csv = run_baselines(baseline_env, seed,
                #                               baselines_dir)

                # Run garage algorithms
                env.reset()
                garage_csv = run_garage(env, seed, garage_dir)

            garage_csvs.append(garage_csv)

        env.close()

        Rh.plot(b_csvs=baselines_csvs,
                g_csvs=garage_csvs,
                g_x='Iteration',
                g_y='AverageReturn',
                g_z='garage',
                b_x='Iteration',
                b_y='AverageReturn',
                b_z='baselines',
                trials=3,
                seeds=seeds,
                plt_file=plt_file,
                env_id=env_id,
                x_label='Iteration',
                y_label='AverageReturn')

        result_json[env_id] = Rh.create_json(b_csvs=baselines_csvs,
                                             g_csvs=garage_csvs,
                                             seeds=seeds,
                                             trails=3,
                                             g_x='Iteration',
                                             g_y='AverageReturn',
                                             b_x='Iteration',
                                             b_y='AverageReturn',
                                             factor_g=2048,
                                             factor_b=2048)

    Rh.write_file(result_json, 'PPO')
    def test_benchmark_categorical_mlp_policy(self):
        '''
        Compare benchmarks between garage and baselines.
        :return:
        '''
        categorical_tasks = [
            'LunarLander-v2', 'CartPole-v1', 'Assault-ramDeterministic-v4',
            'Breakout-ramDeterministic-v4',
            'ChopperCommand-ramDeterministic-v4',
            'Tutankham-ramDeterministic-v4'
        ]
        timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
        benchmark_dir = './data/local/benchmarks/categorical_mlp_policy/{0}/'
        benchmark_dir = benchmark_dir.format(timestamp)
        result_json = {}
        for task in categorical_tasks:
            env_id = task
            env = gym.make(env_id)
            trials = 3
            seeds = random.sample(range(100), trials)

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                '{}_benchmark.png'.format(env_id))
            relplt_file = osp.join(benchmark_dir,
                                   '{}_benchmark_mean.png'.format(env_id))
            garage_csvs = []

            for trial in range(trials):
                seed = seeds[trial]

                trial_dir = task_dir + '/trial_%d_seed_%d' % (trial + 1, seed)
                garage_dir = trial_dir + '/garage'

                with tf.Graph().as_default():
                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)
                garage_csvs.append(garage_csv)

            env.close()

            Rh.plot(b_csvs=garage_csvs,
                    g_csvs=garage_csvs,
                    g_x='Iteration',
                    g_y='AverageReturn',
                    g_z='Garage',
                    b_x='Iteration',
                    b_y='AverageReturn',
                    b_z='Garage',
                    trials=trials,
                    seeds=seeds,
                    plt_file=plt_file,
                    env_id=env_id,
                    x_label='Iteration',
                    y_label='AverageReturn')

            Rh.relplot(b_csvs=garage_csvs,
                       g_csvs=garage_csvs,
                       g_x='Iteration',
                       g_y='AverageReturn',
                       g_z='Garage',
                       b_x='Iteration',
                       b_y='AverageReturn',
                       b_z='Garage',
                       trials=trials,
                       seeds=seeds,
                       plt_file=relplt_file,
                       env_id=env_id,
                       x_label='Iteration',
                       y_label='AverageReturn')

            result_json[env_id] = Rh.create_json(b_csvs=garage_csvs,
                                                 g_csvs=garage_csvs,
                                                 seeds=seeds,
                                                 trails=trials,
                                                 g_x='Iteration',
                                                 g_y='AverageReturn',
                                                 b_x='Iteration',
                                                 b_y='AverageReturn',
                                                 factor_g=2048,
                                                 factor_b=2048)

        Rh.write_file(result_json, 'PPO')
Exemplo n.º 7
0
    def test_benchmark_trpo(self):
        """
        Compare benchmarks between garage and baselines.

        :return:
        """
        mujoco1m = benchmarks.get_benchmark("Mujoco1M")

        timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")
        benchmark_dir = "./data/local/benchmarks/trpo/%s/" % timestamp
        result_json = {}
        for task in mujoco1m["tasks"]:
            env_id = task["env_id"]
            env = gym.make(env_id)
            baseline_env = AutoStopEnv(env_name=env_id, max_path_length=100)

            seeds = random.sample(range(100), task["trials"])

            task_dir = osp.join(benchmark_dir, env_id)
            plt_file = osp.join(benchmark_dir,
                                "{}_benchmark.png".format(env_id))
            baselines_csvs = []
            garage_csvs = []

            for trial in range(task["trials"]):
                _PLACEHOLDER_CACHE.clear()
                seed = seeds[trial]

                trial_dir = task_dir + "/trial_%d_seed_%d" % (trial + 1, seed)
                garage_dir = trial_dir + "/garage"
                baselines_dir = trial_dir + "/baselines"

                with tf.Graph().as_default():
                    # Run garage algorithms
                    env.reset()
                    garage_csv = run_garage(env, seed, garage_dir)

                    # Run baseline algorithms
                    baseline_env.reset()
                    baselines_csv = run_baselines(baseline_env, seed,
                                                  baselines_dir)

                garage_csvs.append(garage_csv)
                baselines_csvs.append(baselines_csv)

            Rh.plot(
                b_csvs=baselines_csvs,
                g_csvs=garage_csvs,
                g_x="Iteration",
                g_y="AverageReturn",
                b_x="EpThisIter",
                b_y="EpRewMean",
                trials=task["trials"],
                seeds=seeds,
                plt_file=plt_file,
                env_id=env_id,
                x_label="Iteration",
                y_label="AverageReturn")

            result_json[env_id] = Rh.create_json(
                b_csvs=baselines_csvs,
                g_csvs=garage_csvs,
                seeds=seeds,
                trails=task["trials"],
                g_x="Iteration",
                g_y="AverageReturn",
                b_x="TimestepsSoFar",
                b_y="EpRewMean",
                factor_g=1024,
                factor_b=1)
            env.close()

        Rh.write_file(result_json, "TRPO")