Python TotalReward示例

编程语言: Python

命名空间/包名称: rand_param_envs.gym.benchmarks.scoring

方法/功能: TotalReward

hotexamples.com的示例: 3

Python TotalReward - 已找到3个示例。这些是从开源项目中提取的最受好评的rand_param_envs.gym.benchmarks.scoring.TotalReward现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： test_scoring.py 项目： russellmendonca/mier_public

def test_max_seconds():
    benchmark = registration.Benchmark(id='TestBenchmark-v0',
                                       scorer=scoring.TotalReward(),
                                       tasks=[
                                           {
                                               'env_id': 'CartPole-v0',
                                               'trials': 1,
                                               'max_seconds': 1,
                                           },
                                       ])

    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[100, 100],
                                                episode_rewards=[0, 100],
                                                episode_types=['t', 't'],
                                                timestamps=[1.5, 2])
    _assert_benchmark_result(benchmark_result, score=0.5)

    # make sure we only include the first result because of wall clock time
    benchmark_result = _benchmark_result_helper(
        benchmark,
        data_sources=[0, 0, 0],
        episode_lengths=[100, 100, 100],
        episode_rewards=[0, 100, 100],
        episode_types=['t', 't', 't'],
        timestamps=[2, 102, 202])
    _assert_benchmark_result(benchmark_result, score=0.0)

示例#2

显示文件

文件： test_scoring.py 项目： russellmendonca/mier_public

def test_clip_scoring():
    benchmark = registration.Benchmark(id='TestBenchmark-v0',
                                       scorer=scoring.TotalReward(),
                                       tasks=[
                                           {
                                               'env_id': 'CartPole-v0',
                                               'trials': 1,
                                               'max_timesteps': 5,
                                           },
                                       ])
    # simple scoring
    benchmark_result = _benchmark_result_helper(benchmark)
    _assert_benchmark_result(benchmark_result, score=0.01)

    # test a successful run
    benchmark_result = _benchmark_result_helper(benchmark,
                                                episode_rewards=[100])
    _assert_benchmark_result(benchmark_result, score=1.0, solves=True)

示例#3

显示文件

文件： test_scoring.py 项目： russellmendonca/mier_public

                                                timestamps=[1.5, 2])
    _assert_benchmark_result(benchmark_result, score=0.5)

    # make sure we only include the first result because of wall clock time
    benchmark_result = _benchmark_result_helper(
        benchmark,
        data_sources=[0, 0, 0],
        episode_lengths=[100, 100, 100],
        episode_rewards=[0, 100, 100],
        episode_types=['t', 't', 't'],
        timestamps=[2, 102, 202])
    _assert_benchmark_result(benchmark_result, score=0.0)


reward_benchmark = registration.Benchmark(id='TestBenchmark-v0',
                                          scorer=scoring.TotalReward(),
                                          tasks=[
                                              {
                                                  'env_id': 'CartPole-v0',
                                                  'trials': 1,
                                                  'max_timesteps': 5,
                                              },
                                              {
                                                  'env_id': 'Pendulum-v0',
                                                  'trials': 1,
                                                  'max_timesteps': 5,
                                              },
                                          ])


def test_total_reward_evaluation_scoring():