示例#1
0
def test_get_reward(base_env: BaseEnv):
    base_env.reset()
    obs, reward, done, _ = base_env.step(4)

    base_env.state.score_item = [10, 200, -500, 500]
    assert reward == -0.01
    assert base_env.get_reward() == 30
示例#2
0
def test_step(base_env: BaseEnv):
    base_env.reset()
    obs, reward, done, _ = base_env.step(5)
    assert reward == 0.01
    assert not done

    obs, reward, done, _ = base_env.step(2)
    assert reward == -0.01
    assert base_env.state.get_bomberman().get_direction() == 'East'

    obs, reward, done, _ = base_env.step(2)
    assert reward == -0.01
    assert base_env.state.get_bomberman().get_direction() == 'East'

    obs, reward, done, _ = base_env.step(2)
    assert reward == -0.01
    assert base_env.state.get_bomberman().get_direction() == 'East'

    obs, reward, done, _ = base_env.step(4)
    assert reward == -0.01

    obs, reward, done, _ = base_env.step(4)
    assert reward == 10

    bomb = Bomb((3, 3))
    bomb.countdown = 1
    base_env.state.get_bombs().append(bomb)

    obs, reward, done, _ = base_env.step(4)
    assert reward == 80
    assert done