def test_run(self): env = Hanabi(agents=list('1234'), hand_size=4) done = False while not done: env.render() action = Actions.sample(hand_size=4, players=4, hints=env.info['hints']) obs, reward, done, info = env.step(action)
def test_speed_runs(self): env = Hanabi(agents=list('1234'), hand_size=4) steps = 0 for episode in range(1, 1_001): done = False env.reset() reward = 0 while not done: action = Actions.sample(hand_size=4, players=4, hints=env.info['hints']) obs, reward, done, info = env.step(action) steps += 1 print(f"\rEpisode: {episode:4d}/1000, steps: {steps:6d}, reward: {reward}", end='')
from environment import Hanabi from environment.utils.constants import Actions if __name__ == '__main__': env = Hanabi(agents=list('12'), ) # Run a single game loop obs = env.reset() done = False while not done: action = Actions.sample(hand_size=env.hand_size, players=len(env.players), hints=env.hints) obs, reward, done, info = env.step(action) env.render() # Unpack an observations for key, value in obs.items(): if key == 'turns log': print(f"Key: {key} (unpacked dict)", end='\n\n') for turn, value in value.items(): print(f"\t - Turn {turn}: {' '.join(map(str, value))}") continue print(f"Key: {key}\n{value}", end='\n\n')
def test_init(self): env = Hanabi(agents=list('1234'), hand_size=4) env.render()
def test_action_sample(self): env = Hanabi(agents=list('1234'), hand_size=4) env.render() env.step(Actions.sample(hand_size=4, players=4, hints=env.info['hints'])) env.render()
def test_action_discard(self): env = Hanabi(agents=list('1234'), hand_size=4) env.render() env.step(Actions.DISCARD(1)) env.render()
def test_action_inform_rank(self): env = Hanabi(agents=list('1234'), hand_size=4) env.render() env.step(Actions.INFORM_RANK(2, 3)) env.render()
def test_action_inform_color(self): env = Hanabi(agents=list('1234'), hand_size=4) env.render() env.step(Actions.INFORM_COLOR(2, 3)) env.render()
def test_action_play(self): env = Hanabi(agents=list('1234'), hand_size=4) env.render() env.step(Actions.PLAY(1, 2)) env.render()