def main(unused_argv): begin = time.time() env = Go() agents = [RandomAgent(idx) for idx in range(2)] for ep in range(FLAGS.num_eval): time_step = env.reset() while not time_step.last(): player_id = time_step.observations["current_player"] if player_id == 0: agent_output = agents[player_id].step(time_step) else: agent_output = agents[player_id].step(time_step) action_list = agent_output.action time_step = env.step(action_list) print(time_step.observations["info_state"][0]) # Episode is over, step all agents with final info state. # for agent in agents: agents[0].step(time_step) agents[1].step(time_step) print(time_step.rewards, env.get_current_board()) print('Time elapsed:', time.time() - begin)
import random # import os # # os.environ['BOARD_SIZE'] = '7' from environment.GoEnv import Go import time if __name__ == '__main__': begin = time.time() env = Go() for i in range(10): state = env.reset() # a go.Position object while True: # cur_player = env.to_play _a = env.get_all_legal_moves() probs = _a / sum(_a) probs = probs.cumsum() selection = random.random() fcoord = probs.searchsorted(selection) n_s, done, rew, info = env.step(fcoord) print(fcoord, env.get_current_board()) if done: break print('Time elapsed:', time.time() - begin)