示例#1
0
 def test_run(self):
     env = Hanabi(agents=list('1234'), hand_size=4)
     done = False
     while not done:
         env.render()
         action = Actions.sample(hand_size=4, players=4, hints=env.info['hints'])
         obs, reward, done, info = env.step(action)
示例#2
0
 def test_speed_runs(self):
     env = Hanabi(agents=list('1234'), hand_size=4)
     steps = 0
     for episode in range(1, 1_001):
         done = False
         env.reset()
         reward = 0
         while not done:
             action = Actions.sample(hand_size=4, players=4, hints=env.info['hints'])
             obs, reward, done, info = env.step(action)
             steps += 1
         print(f"\rEpisode: {episode:4d}/1000, steps: {steps:6d}, reward: {reward}", end='')
示例#3
0
from environment import Hanabi
from environment.utils.constants import Actions

if __name__ == '__main__':
    env = Hanabi(agents=list('12'), )

    # Run a single game loop
    obs = env.reset()
    done = False
    while not done:
        action = Actions.sample(hand_size=env.hand_size, players=len(env.players), hints=env.hints)
        obs, reward, done, info = env.step(action)
        env.render()

    # Unpack an observations
    for key, value in obs.items():
        if key == 'turns log':
            print(f"Key: {key} (unpacked dict)", end='\n\n')
            for turn, value in value.items():
                print(f"\t - Turn {turn}: {' '.join(map(str, value))}")
            continue
        print(f"Key: {key}\n{value}", end='\n\n')
示例#4
0
 def test_init(self):
     env = Hanabi(agents=list('1234'), hand_size=4)
     env.render()
示例#5
0
 def test_action_sample(self):
     env = Hanabi(agents=list('1234'), hand_size=4)
     env.render()
     env.step(Actions.sample(hand_size=4, players=4, hints=env.info['hints']))
     env.render()
示例#6
0
 def test_action_discard(self):
     env = Hanabi(agents=list('1234'), hand_size=4)
     env.render()
     env.step(Actions.DISCARD(1))
     env.render()
示例#7
0
 def test_action_inform_rank(self):
     env = Hanabi(agents=list('1234'), hand_size=4)
     env.render()
     env.step(Actions.INFORM_RANK(2, 3))
     env.render()
示例#8
0
 def test_action_inform_color(self):
     env = Hanabi(agents=list('1234'), hand_size=4)
     env.render()
     env.step(Actions.INFORM_COLOR(2, 3))
     env.render()
示例#9
0
 def test_action_play(self):
     env = Hanabi(agents=list('1234'), hand_size=4)
     env.render()
     env.step(Actions.PLAY(1, 2))
     env.render()