Exemplo n.º 1
0
def test_max_seconds():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_seconds': 1,
            },
        ])

    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[100, 100],
                                                episode_rewards=[0, 100],
                                                episode_types=['t', 't'],
                                                timestamps=[1.5, 2])
    assert _is_close(benchmark_result['scores'][0],
                     0.5), "benchmark_result={}".format(benchmark_result)

    # make sure we only include the first result because of wall clock time
    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[100, 100],
                                                episode_rewards=[0, 100],
                                                episode_types=['t', 't'],
                                                timestamps=[2, 100])
    assert _is_close(benchmark_result['scores'][0],
                     0.0), "benchmark_result={}".format(benchmark_result)
Exemplo n.º 2
0
def test_clip_average_max_timesteps():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_timesteps': 2,
            },
        ])

    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[1, 1],
                                                episode_rewards=[1, 1],
                                                episode_types=['t', 't'],
                                                timestamps=[2, 3])
    _assert_benchmark_result(benchmark_result, score=0.01)

    # make sure we only include the first result because of timesteps
    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0, 0],
                                                episode_lengths=[1, 100, 100],
                                                episode_rewards=[1, 100, 100],
                                                episode_types=['t', 't', 't'],
                                                timestamps=[2, 102, 202])
    _assert_benchmark_result(benchmark_result, score=0.005, solves=False)
Exemplo n.º 3
0
def test_max_timesteps():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_timesteps': 2,
            },
        ])

    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[1, 1],
                                                episode_rewards=[1, 1],
                                                episode_types=['t', 't'],
                                                timestamps=[2, 3])
    assert _is_close(benchmark_result['scores'][0],
                     0.01), "benchmark_result={}".format(benchmark_result)

    # make sure we only include the first result because of timesteps
    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[1, 100],
                                                episode_rewards=[1, 100],
                                                episode_types=['t', 't'],
                                                timestamps=[2, 102])
    assert _is_close(benchmark_result['scores'][0],
                     0.005), "benchmark_result={}".format(benchmark_result)
    assert not np.any(benchmark_result['solves']
                      [0]), "benchmark_result={}".format(benchmark_result)
Exemplo n.º 4
0
def test():
    benchmark = registration.Benchmark(id='MyBenchmark-v0',
                                       scorer=scoring.ClipTo01ThenAverage(),
                                       tasks=[{
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 5
                                       }, {
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(directory=temp, video_callable=False)(env)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation(
            'CartPole-v0', results['data_sources'],
            results['initial_reset_timestamps'], results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0':
            evaluation_score['scores'],
        })

        assert np.all(
            np.isclose(evaluation_score['scores'],
                       [0.00089999999999999998, 0.0054000000000000003
                        ])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(
            benchmark_score,
            0.00315), "benchmark_score={}".format(benchmark_score)
Exemplo n.º 5
0
def test():
    benchmark = registration.Benchmark(id='MyBenchmark-v0',
                                       scorer=scoring.ClipTo01ThenAverage(),
                                       task_groups={
                                           'CartPole-v0': [{
                                               'seeds': 1,
                                               'timesteps': 5
                                           }, {
                                               'seeds': 1,
                                               'timesteps': 100
                                           }],
                                       })

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp, video_callable=False, seed=0)

        env.monitor.configure(mode='evaluation')
        rollout(env)

        env.monitor.configure(mode='training')
        for i in range(2):
            rollout(env)

        env.monitor.configure(mode='evaluation')
        rollout(env, good=True)

        env.monitor.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation(
            'CartPole-v0', results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'], results['initial_reset_timestamp'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0':
            evaluation_score['scores'],
        })

        assert np.all(
            np.isclose(evaluation_score['scores'],
                       [0.00089999999999999998, 0.0054000000000000003
                        ])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(
            benchmark_score,
            0.00315), "benchmark_score={}".format(benchmark_score)
Exemplo n.º 6
0
def test_clip_average_evaluation_scoring():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=1),
        tasks=[
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 5,
            },
        ]
    )
    # simple scoring
    benchmark_result = _benchmark_result_helper(benchmark)
    _assert_benchmark_result(benchmark_result, score=0.01)

    # test a successful run
    benchmark_result = _benchmark_result_helper(benchmark, episode_rewards=[100, 100], episode_lengths=[1, 1])
    _assert_benchmark_result(benchmark_result, score=1.0, solves=True)
Exemplo n.º 7
0
def test_clip_average_evaluation_not_enough_rewards():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_timesteps': 5,
            },
        ])
    # simple scoring
    benchmark_result = _benchmark_result_helper(benchmark)
    _assert_evaluation_result(
        benchmark_result,
        score=0.005,
        rewards=[np.array([1, 0])],
        lengths=[np.array([1, 0])],
    )
Exemplo n.º 8
0
def test_clip_scoring():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=1),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_timesteps': 5,
            },
        ])
    # simple scoring
    benchmark_result = _benchmark_result_helper(benchmark)
    assert _is_close(benchmark_result['scores'][0],
                     0.01), "benchmark_result={}".format(benchmark_result)

    # test a successful run
    benchmark_result = _benchmark_result_helper(benchmark,
                                                episode_rewards=[100])
    assert _is_close(benchmark_result['scores'][0],
                     1.0), "benchmark_result={}".format(benchmark_result)
    assert np.all(benchmark_result['solves'][0]), "benchmark_result={}".format(
        benchmark_result)
Exemplo n.º 9
0
import numpy as np
from collections import defaultdict
from gym.benchmarks import registration, scoring

import gym
gym.undo_logger_setup()

benchmark = registration.Benchmark(id='TestBenchmark-v0',
                                   scorer=scoring.ClipTo01ThenAverage(),
                                   tasks=[
                                       {
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       },
                                       {
                                           'env_id': 'Pendulum-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       },
                                   ])


def _is_close(x, target):
    return np.all(np.isclose(x, target))


def _assert_benchmark_result(result,
                             score=None,
                             solves=None,
                             summed_training_seconds=None,
Exemplo n.º 10
0
                       'max_timesteps': int(4e7),
                       'reward_floor': 2047.2,
                       'reward_ceiling': 5000.0,
                   }, {
                       'env_id': 'VentureNoFrameskip-v4',
                       'trials': 2,
                       'max_timesteps': int(4e7),
                       'reward_floor': 18.0,
                       'reward_ceiling': 100.0,
                   }])

register_benchmark(id='ClassicControl2-v0',
                   name='ClassicControl2',
                   view_group="Control",
                   description='Simple classic control benchmark',
                   scorer=scoring.ClipTo01ThenAverage(),
                   tasks=[
                       {
                           'env_id': 'CartPole-v0',
                           'trials': 1,
                           'max_timesteps': 2000,
                       },
                       {
                           'env_id': 'Pendulum-v0',
                           'trials': 1,
                           'max_timesteps': 1000,
                       },
                   ])

register_benchmark(id='ClassicControl-v0',
                   name='ClassicControl',