Python make_env示例

编程语言: Python

命名空间/包名称: rusp.env_indirect_reciprocity

方法/功能: make_env

hotexamples.com的示例: 4

Python make_env - 已找到4个示例。这些是从开源项目中提取的最受好评的rusp.env_indirect_reciprocity.make_env现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def test_last_always_plays():
    env = make_env(last_agent_always_plays=True)
    obs = env.reset()
    assert obs['youre_playing_self'][-1, 0]
    ac = {'action_defect': np.random.randint(0, 2, size=(env.metadata['n_actors']))}
    for i in range(1000):
        obs, _, done, _ = env.step(ac)
        assert obs['youre_playing_self'][-1, 0]

        if done:
            obs = env.reset()
            assert obs['youre_playing_self'][-1, 0]

示例#2

显示文件

def test_last_first_versus_last():
    env = make_env(last_step_first_agent_vs_last_agent=True)
    prev_obs = env.reset()
    ac = {'action_defect': np.random.randint(0, 2, size=(env.metadata['n_actors']))}
    for i in range(1000):
        obs, _, done, _ = env.step(ac)

        if done:
            assert prev_obs['youre_playing_self'][-1, 0]
            assert prev_obs['youre_playing_self'][0, 0]
            obs = env.reset()

        prev_obs = deepcopy(obs)

示例#3

显示文件

def test_last_doesnt_play_until():
    env = make_env(last_doesnt_play_until_t=5)
    ac = {'action_defect': np.random.randint(0, 2, size=(env.metadata['n_actors']))}
    obs = env.reset()
    done = False
    t = 0
    for i in range(1000):
        if t < 5:
            assert not obs['youre_playing_self'][-1, 0]
        obs, rew, done, info = env.step(ac)
        t += 1

        if done:
            obs = env.reset()
            done = False
            t = 0

示例#4

显示文件

def _test_fixed_policy(against_all_d=False, against_all_c=False):
    env = make_env(against_all_d=against_all_d, against_all_c=against_all_c,
                   last_agent_always_plays=True)
    prev_obs = env.reset()
    for i in range(1000):
        currently_playing = np.squeeze(prev_obs['youre_playing_self'])
        ac = {'action_defect': np.random.randint(0, 2, size=(env.metadata['n_actors']))}

        obs, rew, done, info = env.step(ac)

        if against_all_d:
            assert np.all(rew[currently_playing & (ac['action_defect'] == 0)] == -2)
            assert np.all(rew[currently_playing & (ac['action_defect'] == 1)] == 0)
        elif against_all_c:
            assert np.all(rew[currently_playing & (ac['action_defect'] == 0)] == 2)
            assert np.all(rew[currently_playing & (ac['action_defect'] == 1)] == 4)
        else:
            assert False
        assert np.all(rew[~currently_playing] == 0)

        prev_obs = obs

        if done:
            prev_obs = env.reset()